索引 _ | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Z _ _get_server_weights() (torchrl.collectors.distributed.DistributedWeightUpdater 方法) (torchrl.collectors.distributed.RPCWeightUpdater 方法) (torchrl.collectors.llm.vLLMUpdater 方法) (torchrl.collectors.RayWeightUpdater 方法) _maybe_map_weights() (torchrl.collectors.distributed.DistributedWeightUpdater 方法) (torchrl.collectors.distributed.RPCWeightUpdater 方法) (torchrl.collectors.llm.vLLMUpdater 方法) (torchrl.collectors.RayWeightUpdater 方法) _setup() (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) _skip_update() (torchrl.collectors.RayWeightUpdater 方法) _sync_weights_with_worker() (torchrl.collectors.distributed.DistributedWeightUpdater 方法) (torchrl.collectors.distributed.RPCWeightUpdater 方法) (torchrl.collectors.llm.vLLMUpdater 方法) (torchrl.collectors.RayWeightUpdater 方法) A A2CLoss (torchrl.objectives 中的類) action_key (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) action_keys (torchrl.data.MCTSForest 屬性) (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) action_spec (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) action_spec_unbatched (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) ActionDiscretizer (torchrl.envs.transforms 中的類) ActionDiscretizer.SamplingStrategy (torchrl.envs.transforms 中的類) ActionDiscretizerConfig (torchrl.trainers.algorithms.configs.transforms 中的類) ActionMask (torchrl.envs.transforms 中的類) ActionMaskConfig (torchrl.trainers.algorithms.configs.transforms 中的類) Actor (torchrl.modules.tensordict_module 中的類) actor_loss() (torchrl.objectives.CrossQLoss 方法) (torchrl.objectives.DiscreteSACLoss 方法) (torchrl.objectives.SACLoss 方法) (torchrl.objectives.TD3BCLoss 方法) ActorCriticOperator (torchrl.modules.tensordict_module 中的類) ActorCriticWrapper (torchrl.modules.tensordict_module 中的類) ActorValueOperator (torchrl.modules.tensordict_module 中的類) AdadeltaConfig (torchrl.trainers.algorithms.configs.utils 中的類) AdagradConfig (torchrl.trainers.algorithms.configs.utils 中的類) AdamaxConfig (torchrl.trainers.algorithms.configs.utils 中的類) AdamConfig (torchrl.trainers.algorithms.configs.utils 中的類) AdamWConfig (torchrl.trainers.algorithms.configs.utils 中的類) AdaptiveKLController (torchrl.data 中的類) add() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.ImmutableDatasetWriter 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.replay_buffers.RoundRobinWriter 方法) (torchrl.data.replay_buffers.TensorDictMaxValueWriter 方法) (torchrl.data.replay_buffers.TensorDictRoundRobinWriter 方法) (torchrl.data.replay_buffers.Writer 方法) (torchrl.data.replay_buffers.WriterEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) add_collectors() (torchrl.collectors.distributed.RayCollector 方法) add_module() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) add_random_module (torchrl.objectives 中的類) add_truncated_keys() (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.TransformedEnv 方法) AdditiveGaussianModule (torchrl.modules 中的類) AddThinkingPrompt (torchrl.envs.llm.transforms 中的類) all_actions() (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) all_worker_ids() (torchrl.collectors.distributed.DistributedWeightUpdater 方法), [1] (torchrl.collectors.distributed.RPCWeightUpdater 方法), [1] (torchrl.collectors.llm.vLLMUpdater 方法), [1] (torchrl.collectors.llm.vLLMUpdaterV2 方法) (torchrl.collectors.MultiProcessedWeightUpdater 方法) (torchrl.collectors.RayWeightUpdater 方法), [1] (torchrl.collectors.VanillaWeightUpdater 方法) (torchrl.collectors.WeightUpdaterBase 方法) alpha_loss() (torchrl.objectives.CrossQLoss 方法) (torchrl.objectives.DiscreteSACLoss 方法) (torchrl.objectives.SACLoss 方法) any_done() (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) append() (torchrl.data.llm.History 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.transforms.Compose 方法) append_transform() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.TransformedEnv 方法) apply() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) apply_chat_template() (torchrl.data.llm.History 方法) as_nested_tensor (torchrl.envs.llm.transforms 中的類) as_padded_tensor (torchrl.envs.llm.transforms 中的類) as_remote() (torchrl.collectors.llm.LLMCollector 類方法) (torchrl.collectors.llm.RayLLMCollector 類方法) (torchrl.data.datasets.AtariDQNExperienceReplay 類方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 類方法) (torchrl.data.datasets.D4RLExperienceReplay 類方法) (torchrl.data.datasets.GenDGRLExperienceReplay 類方法) (torchrl.data.datasets.MinariExperienceReplay 類方法) (torchrl.data.datasets.OpenMLExperienceReplay 類方法) (torchrl.data.datasets.OpenXExperienceReplay 類方法) (torchrl.data.datasets.RobosetExperienceReplay 類方法) (torchrl.data.datasets.VD4RLExperienceReplay 類方法) (torchrl.data.PrioritizedReplayBuffer 類方法) (torchrl.data.RayReplayBuffer 類方法) (torchrl.data.RemoteTensorDictReplayBuffer 類方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 類方法) (torchrl.data.ReplayBuffer 類方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 類方法) (torchrl.data.TensorDictReplayBuffer 類方法) ASGDConfig (torchrl.trainers.algorithms.configs.utils 中的類) assert_is_in() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) async_reset_recv() (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) async_reset_send() (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) async_shutdown() (torchrl.collectors.aSyncDataCollector 方法) (torchrl.collectors.DataCollectorBase 方法) (torchrl.collectors.distributed.DistributedDataCollector 方法) (torchrl.collectors.distributed.DistributedSyncDataCollector 方法) (torchrl.collectors.distributed.RayCollector 方法) (torchrl.collectors.distributed.RPCDataCollector 方法) (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) (torchrl.collectors.MultiaSyncDataCollector 方法) (torchrl.collectors.MultiSyncDataCollector 方法) (torchrl.collectors.SyncDataCollector 方法) async_step_recv() (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) async_step_send() (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) aSyncDataCollector (torchrl.collectors 中的類) AsyncDataCollectorConfig (torchrl.trainers.algorithms.configs.collectors 中的類) AsyncEnvPool (torchrl.envs 中的類) AsyncVLLM (torchrl.modules.llm 中的類) AtariDQNExperienceReplay (torchrl.data.datasets 中的類) attach() (torchrl.data.replay_buffers.CompressedListStorage 方法) (torchrl.data.replay_buffers.LazyMemmapStorage 方法) (torchrl.data.replay_buffers.LazyStackStorage 方法) (torchrl.data.replay_buffers.LazyTensorStorage 方法) (torchrl.data.replay_buffers.ListStorage 方法) (torchrl.data.replay_buffers.Storage 方法) (torchrl.data.replay_buffers.StorageEnsemble 方法) (torchrl.data.replay_buffers.TensorStorage 方法) auto_register_info_dict() (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) auto_specs_() (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) auto_unwrap_transformed_env (torchrl 中的類) AutoResetEnv (torchrl.envs.transforms 中的類) AutoResetTransform (torchrl.envs.transforms 中的類) AutoResetTransformConfig (torchrl.trainers.algorithms.configs.transforms 中的類) B base_env (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) BaseDatasetExperienceReplay (torchrl.data.datasets 中的類) batch_dims (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) batch_locked (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.transforms.TransformedEnv 屬性) batch_size (torchrl.data.datasets.AtariDQNExperienceReplay 屬性) (torchrl.data.datasets.BaseDatasetExperienceReplay 屬性) (torchrl.data.datasets.D4RLExperienceReplay 屬性) (torchrl.data.datasets.GenDGRLExperienceReplay 屬性) (torchrl.data.datasets.MinariExperienceReplay 屬性) (torchrl.data.datasets.OpenMLExperienceReplay 屬性) (torchrl.data.datasets.OpenXExperienceReplay 屬性) (torchrl.data.datasets.RobosetExperienceReplay 屬性) (torchrl.data.datasets.VD4RLExperienceReplay 屬性) (torchrl.data.PrioritizedReplayBuffer 屬性) (torchrl.data.RayReplayBuffer 屬性) (torchrl.data.RemoteTensorDictReplayBuffer 屬性) (torchrl.data.replay_buffers.ReplayBufferEnsemble 屬性) (torchrl.data.ReplayBuffer 屬性) (torchrl.data.TensorDictPrioritizedReplayBuffer 屬性) (torchrl.data.TensorDictReplayBuffer 屬性) (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) (torchrl.envs.transforms.TransformedEnv 屬性) BatchedEnvConfig (torchrl.trainers.algorithms.configs.envs 中的類) batching (torchrl.modules.llm.LLMWrapperBase 屬性) (torchrl.modules.llm.RemoteTransformersWrapper 屬性) (torchrl.modules.llm.TransformersWrapper 屬性) (torchrl.modules.llm.vLLMWrapper 屬性) BatchRenorm1d (torchrl.modules 中的類) BatchSizeTransform (torchrl.envs.transforms 中的類) BatchSizeTransformConfig (torchrl.trainers.algorithms.configs.transforms 中的類) BatchSubSampler (torchrl.trainers 中的類) bfloat16() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) biased_softplus (torchrl.modules.utils 中的類) BinarizeReward (torchrl.envs.transforms 中的類) BinarizeRewardConfig (torchrl.trainers.algorithms.configs.transforms 中的類) Binary (torchrl.data 中的類) BinaryDiscreteTensorSpec (torchrl.data 中的類) BinaryToDecimal (torchrl.data 中的類) Bounded (torchrl.data 中的類) BoundedTensorSpec (torchrl.data 中的類) branching_action (torchrl.data.Tree 屬性) BraxEnv() (在 torchrl.envs 模組中) BraxEnvConfig (torchrl.trainers.algorithms.configs.envs_libs 中的類) BraxWrapper() (在 torchrl.envs 模組中) BrowserTransform (torchrl.envs.llm.transforms 中的類) buffers() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) build_td_for_shared_vecnorm() (torchrl.envs.transforms.VecNorm 靜態方法) BurnInTransform (torchrl.envs.transforms 中的類) BurnInTransformConfig (torchrl.trainers.algorithms.configs.transforms 中的類) bytes() (torchrl.data.replay_buffers.CompressedListStorage 方法) C cardinality() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) cat() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) Categorical (torchrl.data 中的類) CatFrames (torchrl.envs.transforms 中的類) CatFramesConfig (torchrl.trainers.algorithms.configs.transforms 中的類) CatTensors (torchrl.envs.transforms 中的類) CatTensorsConfig (torchrl.trainers.algorithms.configs.transforms 中的類) CEMPlanner (torchrl.modules 中的類) CenterCrop (torchrl.envs.transforms 中的類) CenterCropConfig (torchrl.trainers.algorithms.configs.transforms 中的類) ChatEnv (torchrl.envs.llm 中的類) ChatHistory (torchrl.modules.llm 中的類) check_env_specs() (在 torchrl.envs 模組中) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) check_marl_grouping() (在 torchrl.envs 模組中) check_no_exclusive_keys (torchrl.data 中的類) ChessEnv (torchrl.envs 中的類) children() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) cleanup_batching() (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.RemoteTransformersWrapper 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) clear_cache() (torchrl.modules.LLMMaskedCategorical 方法) clear_device_() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) ClearCudaCache (torchrl.trainers 中的類) ClipPPOLoss (torchrl.objectives 中的類) ClipTransform (torchrl.envs.transforms 中的類) ClipTransformConfig (torchrl.trainers.algorithms.configs.transforms 中的類) clone() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.envs.transforms.VecNormV2 方法) close() (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.transforms.Compose 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.objectives.llm.MCAdvantage 方法) collective_rpc() (torchrl.modules.llm.AsyncVLLM 方法) collector (torchrl.collectors.distributed.DistributedWeightUpdater 屬性) (torchrl.collectors.distributed.RPCWeightUpdater 屬性) (torchrl.collectors.llm.vLLMUpdater 屬性) (torchrl.collectors.llm.vLLMUpdaterV2 屬性) (torchrl.collectors.MultiProcessedWeightUpdater 屬性) (torchrl.collectors.RayWeightUpdater 屬性) (torchrl.collectors.VanillaWeightUpdater 屬性) (torchrl.collectors.WeightUpdaterBase 屬性) (torchrl.data.llm.TopKRewardSelector 屬性) (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.GSM8KPrepareQuestion 屬性) (torchrl.envs.llm.GSM8KRewardParser 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.IfEvalScorer 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.llm.transforms.AddThinkingPrompt 屬性) (torchrl.envs.llm.transforms.BrowserTransform 屬性) (torchrl.envs.llm.transforms.DataLoadingPrimer 屬性) (torchrl.envs.llm.transforms.KLComputation 屬性) (torchrl.envs.llm.transforms.KLRewardTransform 屬性) (torchrl.envs.llm.transforms.MCPToolTransform 屬性) (torchrl.envs.llm.transforms.PolicyVersion 屬性) (torchrl.envs.llm.transforms.PythonInterpreter 屬性) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) (torchrl.envs.llm.transforms.RetrieveKL 屬性) (torchrl.envs.llm.transforms.RetrieveLogProb 屬性) (torchrl.envs.llm.transforms.TemplateTransform 屬性) (torchrl.envs.llm.transforms.Tokenizer 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 屬性) (torchrl.envs.transforms.Transform 屬性) (torchrl.modules.llm.LLMWrapperBase 屬性) (torchrl.modules.llm.RemoteTransformersWrapper 屬性) (torchrl.modules.llm.TransformersWrapper 屬性) (torchrl.modules.llm.vLLMWrapper 屬性) (torchrl.objectives.llm.MCAdvantage 屬性) collectors (torchrl.collectors.distributed.DistributedWeightUpdater 屬性) (torchrl.collectors.distributed.RPCWeightUpdater 屬性) (torchrl.collectors.llm.vLLMUpdater 屬性) (torchrl.collectors.llm.vLLMUpdaterV2 屬性) (torchrl.collectors.MultiProcessedWeightUpdater 屬性) (torchrl.collectors.RayWeightUpdater 屬性) (torchrl.collectors.VanillaWeightUpdater 屬性) (torchrl.collectors.WeightUpdaterBase 屬性) compile() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) Compose (torchrl.envs.transforms 中的類) ComposeConfig (torchrl.trainers.algorithms.configs.transforms 中的類) Composite (torchrl.data 中的類) CompositeSpec (torchrl.data 中的類) CompressedListStorage (torchrl.data.replay_buffers 中的類) CompressedListStorageCheckpointer (torchrl.data.replay_buffers 中的類) ConditionalPolicySwitch (torchrl.envs.transforms 中的類) ConditionalPolicySwitchConfig (torchrl.trainers.algorithms.configs.transforms 中的類) ConditionalSkip (torchrl.envs.transforms 中的類) ConditionalSkipConfig (torchrl.trainers.algorithms.configs.transforms 中的類) ConfigBase (torchrl.trainers.algorithms.configs.common 中的類) ConsistentDropout (torchrl.modules 中的類) ConsistentDropoutModule (torchrl.modules 中的類) consolidate_spec (torchrl.data 中的類) ConstantKLController (torchrl.data 中的類) container (torchrl.data.llm.TopKRewardSelector 屬性) (torchrl.envs.llm.GSM8KPrepareQuestion 屬性) (torchrl.envs.llm.GSM8KRewardParser 屬性) (torchrl.envs.llm.IfEvalScorer 屬性) (torchrl.envs.llm.transforms.AddThinkingPrompt 屬性) (torchrl.envs.llm.transforms.BrowserTransform 屬性) (torchrl.envs.llm.transforms.DataLoadingPrimer 屬性) (torchrl.envs.llm.transforms.KLComputation 屬性) (torchrl.envs.llm.transforms.KLRewardTransform 屬性) (torchrl.envs.llm.transforms.MCPToolTransform 屬性) (torchrl.envs.llm.transforms.PolicyVersion 屬性) (torchrl.envs.llm.transforms.PythonInterpreter 屬性) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) (torchrl.envs.llm.transforms.RetrieveKL 屬性) (torchrl.envs.llm.transforms.RetrieveLogProb 屬性) (torchrl.envs.llm.transforms.TemplateTransform 屬性) (torchrl.envs.llm.transforms.Tokenizer 屬性) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 屬性) (torchrl.envs.transforms.Transform 屬性) (torchrl.objectives.llm.MCAdvantage 屬性) contains() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) contains_lazy_spec (torchrl.data 中的類) ContentBase (torchrl.data.llm 中的類) Conv3dNet (torchrl.modules 中的類) convert_to_functional() (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.SFTLoss 方法) (torchrl.objectives.LossModule 方法) ConvNet (torchrl.modules 中的類) ConvNetConfig (torchrl.trainers.algorithms.configs.modules 中的類) correct_for_frame_skip() (在 torchrl.trainers.helpers 模組中) CountFramesLog (torchrl.trainers 中的類) cpu() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.BinaryToDecimal 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.HashToInt 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.MultiStep 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorDictMap 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) CQLLoss (torchrl.objectives 中的類) create_infinite_iterator (torchrl.data 中的類) create_load_balancer() (torchrl.modules.llm.AsyncVLLM 方法) create_rollout_td() (torchrl.data.RolloutFromModel 方法) Crop (torchrl.envs.transforms 中的類) CropConfig (torchrl.trainers.algorithms.configs.transforms 中的類) CrossQLoss (torchrl.objectives 中的類) CSVLogger() (在 torchrl.record.loggers.csv 模組中) CSVLoggerConfig (torchrl.trainers.algorithms.configs.logging 中的類) cuda() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.BinaryToDecimal 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.HashToInt 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.MultiStep 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorDictMap 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) D D4RLExperienceReplay (torchrl.data.datasets 中的類) data_keys (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) data_path (torchrl.data.datasets.AtariDQNExperienceReplay 屬性) (torchrl.data.datasets.BaseDatasetExperienceReplay 屬性) (torchrl.data.datasets.D4RLExperienceReplay 屬性) (torchrl.data.datasets.GenDGRLExperienceReplay 屬性) (torchrl.data.datasets.MinariExperienceReplay 屬性) (torchrl.data.datasets.OpenMLExperienceReplay 屬性) (torchrl.data.datasets.OpenXExperienceReplay 屬性) (torchrl.data.datasets.RobosetExperienceReplay 屬性) (torchrl.data.datasets.VD4RLExperienceReplay 屬性) data_path_root (torchrl.data.datasets.AtariDQNExperienceReplay 屬性) (torchrl.data.datasets.BaseDatasetExperienceReplay 屬性) (torchrl.data.datasets.D4RLExperienceReplay 屬性) (torchrl.data.datasets.GenDGRLExperienceReplay 屬性) (torchrl.data.datasets.MinariExperienceReplay 屬性) (torchrl.data.datasets.OpenMLExperienceReplay 屬性) (torchrl.data.datasets.OpenXExperienceReplay 屬性) (torchrl.data.datasets.RobosetExperienceReplay 屬性) (torchrl.data.datasets.VD4RLExperienceReplay 屬性) DataCollectorBase (torchrl.collectors 中的類) DataCollectorConfig (torchrl.trainers.algorithms.configs.collectors 中的類) dataloader (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) DataLoadingPrimer (torchrl.envs.llm.transforms 中的類) dataset_to_tensordict() (torchrl.data.TokenizedDatasetLoader 靜態方法) DatasetChatEnv (torchrl.envs.llm 中的類) DdpgCnnActor (torchrl.modules 中的類) DdpgCnnQNet (torchrl.modules 中的類) DDPGLoss (torchrl.objectives 中的類) DdpgMlpActor (torchrl.modules 中的類) DdpgMlpQNet (torchrl.modules 中的類) DecisionTransformer (torchrl.modules 中的類) DecisionTransformer.DTConfig (torchrl.modules 中的類) DecisionTransformerInferenceWrapper (torchrl.modules.tensordict_module 中的類) default_atari_dqn() (torchrl.modules.ConvNet 類方法) default_config() (torchrl.modules.DTActor 類方法) (torchrl.modules.OnlineDTActor 類方法) default_keys (torchrl.objectives.A2CLoss 屬性) (torchrl.objectives.CQLLoss 屬性) (torchrl.objectives.CrossQLoss 屬性) (torchrl.objectives.DDPGLoss 屬性) (torchrl.objectives.DiscreteCQLLoss 屬性) (torchrl.objectives.DiscreteIQLLoss 屬性) (torchrl.objectives.DiscreteSACLoss 屬性) (torchrl.objectives.DistributionalDQNLoss 屬性) (torchrl.objectives.DQNLoss 屬性) (torchrl.objectives.DreamerActorLoss 屬性) (torchrl.objectives.DreamerModelLoss 屬性) (torchrl.objectives.DreamerValueLoss 屬性) (torchrl.objectives.DTLoss 屬性) (torchrl.objectives.GAILLoss 屬性) (torchrl.objectives.IQLLoss 屬性) (torchrl.objectives.llm.GRPOLoss 屬性) (torchrl.objectives.llm.SFTLoss 屬性) (torchrl.objectives.multiagent.QMixerLoss 屬性) (torchrl.objectives.OnlineDTLoss 屬性) (torchrl.objectives.PPOLoss 屬性) (torchrl.objectives.REDQLoss 屬性) (torchrl.objectives.ReinforceLoss 屬性) (torchrl.objectives.SACLoss 屬性) (torchrl.objectives.TD3BCLoss 屬性) (torchrl.objectives.TD3Loss 屬性) (torchrl.objectives.value.ValueEstimatorBase 屬性) default_reward_aggregator() (torchrl.envs.llm.IfEvalScorer 方法) default_spec() (torchrl.data.llm.History 類方法) (torchrl.modules.llm.ChatHistory 類方法) (torchrl.modules.llm.LogProbs 類方法) (torchrl.modules.llm.Masks 類方法) (torchrl.modules.llm.Text 類方法) (torchrl.modules.llm.Tokens 類方法) default_value_kwargs (torchrl.objectives 中的類) delete() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) Delta (torchrl.modules 中的類) DensifyReward (torchrl.data 中的類) device (torchrl.data.Binary 屬性) (torchrl.data.BinaryDiscreteTensorSpec 屬性) (torchrl.data.Bounded 屬性) (torchrl.data.BoundedTensorSpec 屬性) (torchrl.data.Categorical 屬性) (torchrl.data.Composite 屬性) (torchrl.data.CompositeSpec 屬性) (torchrl.data.DiscreteTensorSpec 屬性) (torchrl.data.LazyStackedCompositeSpec 屬性) (torchrl.data.LazyStackedTensorSpec 屬性) (torchrl.data.llm.ContentBase 屬性) (torchrl.data.llm.History 屬性) (torchrl.data.MultiCategorical 屬性) (torchrl.data.MultiDiscreteTensorSpec 屬性) (torchrl.data.MultiOneHot 屬性) (torchrl.data.MultiOneHotDiscreteTensorSpec 屬性) (torchrl.data.NonTensor 屬性) (torchrl.data.NonTensorSpec 屬性) (torchrl.data.OneHot 屬性) (torchrl.data.OneHotDiscreteTensorSpec 屬性) (torchrl.data.PairwiseDataset 屬性) (torchrl.data.PromptData 屬性) (torchrl.data.RewardData 屬性) (torchrl.data.Stacked 屬性) (torchrl.data.StackedComposite 屬性) (torchrl.data.TensorSpec 屬性) (torchrl.data.Tree 屬性) (torchrl.data.Unbounded 屬性) (torchrl.data.UnboundedContinuous 屬性) (torchrl.data.UnboundedContinuousTensorSpec 屬性) (torchrl.data.UnboundedDiscrete 屬性) (torchrl.data.UnboundedDiscreteTensorSpec 屬性) (torchrl.envs.llm.IFEvalScoreData 屬性) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) (torchrl.modules.llm.ChatHistory 屬性) (torchrl.modules.llm.LogProbs 屬性) (torchrl.modules.llm.Masks 屬性) (torchrl.modules.llm.RemoteTransformersWrapper 屬性) (torchrl.modules.llm.Text 屬性) (torchrl.modules.llm.Tokens 屬性) (torchrl.objectives.llm.GRPOLossOutput 屬性) (torchrl.objectives.llm.SFTLossOutput 屬性) DeviceCastTransform (torchrl.envs.transforms 中的類) DeviceCastTransformConfig (torchrl.trainers.algorithms.configs.transforms 中的類) dialog_turns_per_batch (torchrl.collectors.llm.LLMCollector 屬性) (torchrl.collectors.llm.RayLLMCollector 屬性) DiscreteActionProjection (torchrl.envs.transforms 中的類) DiscreteActionProjectionConfig (torchrl.trainers.algorithms.configs.transforms 中的類) DiscreteCQLLoss (torchrl.objectives 中的類) DiscreteIQLLoss (torchrl.objectives 中的類) DiscreteSACLoss (torchrl.objectives 中的類) DiscreteTensorSpec (torchrl.data 中的類) dist_params_keys (torchrl.modules.llm.RemoteTransformersWrapper 屬性) dist_sample_keys (torchrl.modules.llm.RemoteTransformersWrapper 屬性) distance_loss (torchrl.objectives 中的類) DistributedDataCollector (torchrl.collectors.distributed 中的類) DistributedSyncDataCollector (torchrl.collectors.distributed 中的類) DistributedWeightUpdater (torchrl.collectors.distributed 中的類) DistributionalDQNLoss (torchrl.objectives 中的類) DistributionalDQNnet (torchrl.modules 中的類) DistributionalQValueActor (torchrl.modules.tensordict_module 中的類) DistributionalQValueHook (torchrl.modules 中的類) DistributionalQValueModule (torchrl.modules.tensordict_module 中的類) DMControlEnv() (在 torchrl.envs 模組中) DMControlEnvConfig (torchrl.trainers.algorithms.configs.envs_libs 中的類) DMControlWrapper() (在 torchrl.envs 模組中) done_key (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) done_keys (torchrl.data.MCTSForest 屬性) (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) done_keys_groups (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) done_spec (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) done_spec_unbatched (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) double() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) DoubleToFloat (torchrl.envs.transforms 中的類) DoubleToFloatConfig (torchrl.trainers.algorithms.configs.transforms 中的類) DQNLoss (torchrl.objectives 中的類) DreamerActor (torchrl.modules 中的類) DreamerActorLoss (torchrl.objectives 中的類) DreamerDecoder() (在 torchrl.envs.model_based.dreamer 模組中) DreamerEnv() (在 torchrl.envs.model_based.dreamer 模組中) DreamerModelLoss (torchrl.objectives 中的類) DreamerValueLoss (torchrl.objectives 中的類) DTActor (torchrl.modules 中的類) DTLoss (torchrl.objectives 中的類) DTypeCastTransform (torchrl.envs.transforms 中的類) DTypeCastTransformConfig (torchrl.trainers.algorithms.configs.transforms 中的類) DuelingCnnDQNet (torchrl.modules 中的類) dump() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.CompressedListStorage 方法) (torchrl.data.replay_buffers.LazyMemmapStorage 方法) (torchrl.data.replay_buffers.LazyStackStorage 方法) (torchrl.data.replay_buffers.LazyTensorStorage 方法) (torchrl.data.replay_buffers.ListStorage 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.replay_buffers.Storage 方法) (torchrl.data.replay_buffers.StorageEnsemble 方法) (torchrl.data.replay_buffers.TensorStorage 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) dumps() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.PairwiseDataset 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.PromptData 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.CompressedListStorageCheckpointer 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.RewardData 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) E edges() (torchrl.data.Tree 方法) EGreedyModule (torchrl.modules 中的類) empty() (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) empty_cache() (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.TransformedEnv 方法) encode() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) endless_dataloader (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) EndOfLifeTransform (torchrl.envs.transforms 中的類) EndOfLifeTransformConfig (torchrl.trainers.algorithms.configs.transforms 中的類) entropy() (torchrl.modules.LLMMaskedCategorical 方法) (torchrl.modules.MaskedCategorical 方法) (torchrl.modules.OneHotCategorical 方法) enumerate() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) env_batch_sizes (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) EnvBase (torchrl.envs 中的類) EnvConfig (torchrl.trainers.algorithms.configs.envs 中的類) EnvCreator (torchrl.envs 中的類) EnvLibsConfig (torchrl.trainers.algorithms.configs.envs_libs 中的類) EnvMetaData (torchrl.envs 中的類) erase_memoize_cache() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) eval() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.TransformedEnv 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) ExcludeTransform (torchrl.envs.transforms 中的類) ExcludeTransformConfig (torchrl.trainers.algorithms.configs.transforms 中的類) expand() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) (torchrl.modules.Delta 方法) exploration_type() (在 torchrl.envs 模組中) extend() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.MCTSForest 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.ImmutableDatasetWriter 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.replay_buffers.RoundRobinWriter 方法) (torchrl.data.replay_buffers.TensorDictMaxValueWriter 方法) (torchrl.data.replay_buffers.TensorDictRoundRobinWriter 方法) (torchrl.data.replay_buffers.Writer 方法) (torchrl.data.replay_buffers.WriterEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) extra_repr() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) extract_tags() (torchrl.envs.llm.GSM8KRewardParser 靜態方法) F fake_tensordict() (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) fast_encoding() (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) fields() (torchrl.data.llm.ContentBase 類方法) (torchrl.data.llm.History 類方法) (torchrl.data.PairwiseDataset 類方法) (torchrl.data.PromptData 類方法) (torchrl.data.RewardData 類方法) (torchrl.data.Tree 類方法) (torchrl.envs.llm.IFEvalScoreData 類方法) (torchrl.modules.llm.ChatHistory 類方法) (torchrl.modules.llm.LogProbs 類方法) (torchrl.modules.llm.Masks 類方法) (torchrl.modules.llm.Text 類方法) (torchrl.modules.llm.Tokens 類方法) (torchrl.objectives.llm.GRPOLossOutput 類方法) (torchrl.objectives.llm.SFTLossOutput 類方法) FiniteTensorDictCheck (torchrl.envs.transforms 中的類) FiniteTensorDictCheckConfig (torchrl.trainers.algorithms.configs.transforms 中的類) fit() (torchrl.data.RandomProjectionHash 方法) Flat2TED (torchrl.data 中的類) FlatStorageCheckpointer (torchrl.data.replay_buffers 中的類) flatten() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) FlattenObservation (torchrl.envs.transforms 中的類) FlattenObservationConfig (torchrl.trainers.algorithms.configs.transforms 中的類) float() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) forward() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.ActionMask 方法) (torchrl.envs.transforms.AutoResetTransform 方法) (torchrl.envs.transforms.BatchSizeTransform 方法) (torchrl.envs.transforms.BurnInTransform 方法) (torchrl.envs.transforms.CatFrames 方法) (torchrl.envs.transforms.CatTensors 方法) (torchrl.envs.transforms.Compose 方法) (torchrl.envs.transforms.ConditionalPolicySwitch 方法) (torchrl.envs.transforms.ConditionalSkip 方法) (torchrl.envs.transforms.DeviceCastTransform 方法) (torchrl.envs.transforms.DTypeCastTransform 方法) (torchrl.envs.transforms.EndOfLifeTransform 方法) (torchrl.envs.transforms.ExcludeTransform 方法) (torchrl.envs.transforms.FiniteTensorDictCheck 方法) (torchrl.envs.transforms.FlattenObservation 方法) (torchrl.envs.transforms.FrameSkipTransform 方法) (torchrl.envs.transforms.InitTracker 方法) (torchrl.envs.transforms.KLRewardTransform 方法) (torchrl.envs.transforms.PinMemoryTransform 方法) (torchrl.envs.transforms.RandomCropTensorDict 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.RemoveEmptySpecs 方法) (torchrl.envs.transforms.RenameTransform 方法) (torchrl.envs.transforms.Reward2GoTransform 方法) (torchrl.envs.transforms.RewardSum 方法) (torchrl.envs.transforms.SelectTransform 方法) (torchrl.envs.transforms.Stack 方法) (torchrl.envs.transforms.StepCounter 方法) (torchrl.envs.transforms.TargetReturn 方法) (torchrl.envs.transforms.TensorDictPrimer 方法) (torchrl.envs.transforms.TimeMaxPool 方法) (torchrl.envs.transforms.Timer 方法) (torchrl.envs.transforms.Tokenizer 方法) (torchrl.envs.transforms.TrajCounter 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.envs.transforms.VC1Transform 方法) (torchrl.envs.transforms.VecGymEnvTransform 方法) (torchrl.envs.transforms.VecNorm 方法) (torchrl.envs.transforms.VIPRewardTransform 方法) (torchrl.modules.AdditiveGaussianModule 方法) (torchrl.modules.BatchRenorm1d 方法) (torchrl.modules.ConsistentDropout 方法) (torchrl.modules.ConsistentDropoutModule 方法) (torchrl.modules.Conv3dNet 方法) (torchrl.modules.ConvNet 方法) (torchrl.modules.DdpgCnnActor 方法) (torchrl.modules.DdpgCnnQNet 方法) (torchrl.modules.DdpgMlpActor 方法) (torchrl.modules.DdpgMlpQNet 方法) (torchrl.modules.DecisionTransformer 方法) (torchrl.modules.DistributionalDQNnet 方法) (torchrl.modules.DreamerActor 方法) (torchrl.modules.DTActor 方法) (torchrl.modules.DuelingCnnDQNet 方法) (torchrl.modules.EGreedyModule 方法) (torchrl.modules.GRU 方法) (torchrl.modules.GRUCell 方法) (torchrl.modules.GRUModule 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.modules.LSTM 方法) (torchrl.modules.LSTMCell 方法) (torchrl.modules.LSTMModule 方法) (torchrl.modules.MLP 方法) (torchrl.modules.MPCPlannerBase 方法) (torchrl.modules.MultiAgentNetBase 方法) (torchrl.modules.ObsDecoder 方法) (torchrl.modules.ObsEncoder 方法) (torchrl.modules.OnlineDTActor 方法) (torchrl.modules.OrnsteinUhlenbeckProcessModule 方法) (torchrl.modules.RSSMPosterior 方法) (torchrl.modules.RSSMPrior 方法) (torchrl.modules.SqueezeLayer 方法) (torchrl.modules.tensordict_module.DecisionTransformerInferenceWrapper 方法) (torchrl.modules.tensordict_module.DistributionalQValueModule 方法) (torchrl.modules.tensordict_module.MultiStepActorWrapper 方法) (torchrl.modules.tensordict_module.QValueModule 方法) (torchrl.modules.tensordict_module.TanhModule 方法) (torchrl.modules.utils.biased_softplus 方法) (torchrl.modules.VmapModule 方法) (torchrl.objectives.A2CLoss 方法) (torchrl.objectives.ClipPPOLoss 方法) (torchrl.objectives.CQLLoss 方法) (torchrl.objectives.CrossQLoss 方法) (torchrl.objectives.DDPGLoss 方法) (torchrl.objectives.DiscreteCQLLoss 方法) (torchrl.objectives.DiscreteIQLLoss 方法) (torchrl.objectives.DiscreteSACLoss 方法) (torchrl.objectives.DistributionalDQNLoss 方法) (torchrl.objectives.DQNLoss 方法) (torchrl.objectives.DreamerActorLoss 方法) (torchrl.objectives.DreamerModelLoss 方法) (torchrl.objectives.DreamerValueLoss 方法) (torchrl.objectives.DTLoss 方法) (torchrl.objectives.GAILLoss 方法) (torchrl.objectives.IQLLoss 方法) (torchrl.objectives.KLPENPPOLoss 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) (torchrl.objectives.LossModule 方法) (torchrl.objectives.multiagent.QMixerLoss 方法) (torchrl.objectives.OnlineDTLoss 方法) (torchrl.objectives.PPOLoss 方法) (torchrl.objectives.REDQLoss 方法) (torchrl.objectives.ReinforceLoss 方法) (torchrl.objectives.SACLoss 方法) (torchrl.objectives.TD3BCLoss 方法) (torchrl.objectives.TD3Loss 方法) (torchrl.objectives.value.GAE 方法) (torchrl.objectives.value.TD0Estimator 方法) (torchrl.objectives.value.TD1Estimator 方法) (torchrl.objectives.value.TDLambdaEstimator 方法) (torchrl.objectives.value.ValueEstimatorBase 方法) FrameSkipTransform (torchrl.envs.transforms 中的類) FrameSkipTransformConfig (torchrl.trainers.algorithms.configs.transforms 中的類) freeze() (torchrl.envs.transforms.VecNorm 方法) (torchrl.envs.transforms.VecNormV2 方法), [1] from_any() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) from_chats() (torchrl.data.llm.History 類方法) from_dataclass() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) from_dataloader() (torchrl.envs.llm.ChatEnv 類方法) (torchrl.envs.llm.DatasetChatEnv 類方法) (torchrl.envs.llm.GSM8KEnv 類方法) (torchrl.envs.llm.IFEvalEnv 類方法) (torchrl.envs.llm.LLMEnv 類方法) (torchrl.envs.llm.LLMEnv 方法) from_dataset() (torchrl.data.PairwiseDataset 類方法) (torchrl.data.PromptData 類方法) from_h5() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) from_modules() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) from_namedtuple() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) from_policy() (torchrl.collectors.distributed.DistributedWeightUpdater 類方法) (torchrl.collectors.distributed.RPCWeightUpdater 類方法) (torchrl.collectors.llm.vLLMUpdater 類方法) (torchrl.collectors.llm.vLLMUpdaterV2 類方法) (torchrl.collectors.MultiProcessedWeightUpdater 類方法) (torchrl.collectors.RayWeightUpdater 類方法) (torchrl.collectors.VanillaWeightUpdater 類方法) (torchrl.collectors.WeightUpdaterBase 類方法) (torchrl.collectors.WeightUpdaterBase 方法) from_pretrained() (torchrl.modules.llm.AsyncVLLM 類方法) from_pytree() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) from_remote_init() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) from_stateful_net() (torchrl.modules.MultiAgentNetBase 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.SFTLoss 方法) (torchrl.objectives.LossModule 方法) from_struct_array() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) from_tensordict() (torchrl.data.llm.ContentBase 類方法) (torchrl.data.llm.History 類方法) (torchrl.data.PairwiseDataset 類方法) (torchrl.data.PromptData 類方法) (torchrl.data.RewardData 類方法) (torchrl.data.Tree 類方法) (torchrl.envs.llm.IFEvalScoreData 類方法) (torchrl.modules.llm.ChatHistory 類方法) (torchrl.modules.llm.LogProbs 類方法) (torchrl.modules.llm.Masks 類方法) (torchrl.modules.llm.Text 類方法) (torchrl.modules.llm.Tokens 類方法) (torchrl.objectives.llm.GRPOLossOutput 類方法) (torchrl.objectives.llm.SFTLossOutput 類方法) from_tensordict_pair() (torchrl.data.TensorDictMap 類方法) from_text() (torchrl.data.llm.History 類方法) from_tuple() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) fromkeys() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) frozen_copy() (torchrl.envs.transforms.VecNorm 方法) (torchrl.envs.transforms.VecNormV2 方法), [1] full_action_spec (torchrl.data.Tree 屬性) (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) full_action_spec_unbatched (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) full_done_spec (torchrl.data.Tree 屬性) (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) full_done_spec_unbatched (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) full_observation_spec (torchrl.data.Tree 屬性) full_observation_spec_unbatched (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) full_reward_spec (torchrl.data.Tree 屬性) (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) full_reward_spec_unbatched (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) full_state_spec (torchrl.data.Tree 屬性) (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) full_state_spec_unbatched (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) fully_expanded() (torchrl.data.Tree 方法) functional (torchrl.objectives.A2CLoss 屬性) (torchrl.objectives.llm.GRPOLoss 屬性) (torchrl.objectives.llm.SFTLoss 屬性) (torchrl.objectives.LossModule 屬性) (torchrl.objectives.PPOLoss 屬性) (torchrl.objectives.ReinforceLoss 屬性) G GAE (torchrl.objectives.value 中的類) GAILLoss (torchrl.objectives 中的類) gen_params() (torchrl.envs.PendulumEnv 靜態方法) GenDGRLExperienceReplay (torchrl.data.datasets 中的類) generalized_advantage_estimate (torchrl.objectives.value.functional 中的類) generate (torchrl.modules.llm.RemoteTransformersWrapper 屬性) generate() (torchrl.data.RolloutFromModel 方法) (torchrl.modules.llm.AsyncVLLM 方法) generate_exp_name() (在 torchrl.record.loggers 模組中) get() (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.PairwiseDataset 方法) (torchrl.data.PromptData 方法) (torchrl.data.RewardData 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) get_available_libraries() (在 torchrl.envs 模組中) get_batching_state() (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.RemoteTransformersWrapper 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) get_buffer() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) get_cache_usage() (torchrl.modules.llm.AsyncVLLM 方法) get_class_that_defined_method() (torchrl.implement_for 靜態方法) get_critic_operator() (torchrl.modules.tensordict_module.ActorCriticOperator 方法) get_dataloader (torchrl.data 中的類) get_dist() (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.RemoteTransformersWrapper 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) get_dist_with_prompt_mask() (torchrl.modules.llm.RemoteTransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) get_extra_state() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.VecNorm 方法) (torchrl.envs.transforms.VecNormV2 方法), [1] (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) get_input_from_hash() (torchrl.envs.transforms.Hash 方法) get_insert_index() (torchrl.data.replay_buffers.TensorDictMaxValueWriter 方法) get_keys_from_env() (torchrl.data.MCTSForest 方法) get_legal_moves() (torchrl.envs.ChessEnv 方法) get_library_name() (torchrl.envs.llm.MLGymWrapper 靜態方法) get_logger() (在 torchrl.record.loggers 模組中) get_master_address() (torchrl.modules.llm.AsyncVLLM 方法) get_master_port() (torchrl.modules.llm.AsyncVLLM 方法) get_mode() (torchrl.modules.TanhNormal 方法) get_model_metadata() (torchrl.collectors.llm.vLLMUpdater 類方法) (torchrl.collectors.llm.vLLMUpdaterV2 類方法) (torchrl.modules.llm.AsyncVLLM 方法) get_new_version() (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.RemoteTransformersWrapper 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) get_num_unfinished_requests() (torchrl.modules.llm.AsyncVLLM 方法) get_parameter() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) get_policy_head() (torchrl.modules.tensordict_module.ActorCriticOperator 方法) (torchrl.modules.tensordict_module.ActorCriticWrapper 方法) (torchrl.modules.tensordict_module.ActorValueOperator 方法) get_policy_model() (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) get_policy_operator() (torchrl.modules.tensordict_module.ActorCriticWrapper 方法) (torchrl.modules.tensordict_module.ActorValueOperator 方法) get_policy_version() (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) get_primers_from_module (torchrl.modules.utils 中的類) get_random_actor_index() (torchrl.modules.llm.AsyncVLLM 方法) get_reward_operator() (torchrl.modules.tensordict_module.WorldModelWrapper 方法) get_stateful_net() (torchrl.modules.MultiAgentNetBase 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.SFTLoss 方法) (torchrl.objectives.LossModule 方法) get_stats_random_rollout() (在 torchrl.trainers.helpers 模組中) get_submodule() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) get_tp_size() (torchrl.collectors.llm.vLLMUpdaterV2 方法) (torchrl.modules.llm.AsyncVLLM 方法) get_transition_model_operator() (torchrl.modules.tensordict_module.WorldModelWrapper 方法) get_value_head() (torchrl.modules.tensordict_module.ActorCriticOperator 方法) (torchrl.modules.tensordict_module.ActorCriticWrapper 方法) (torchrl.modules.tensordict_module.ActorValueOperator 方法) get_value_operator() (torchrl.modules.tensordict_module.ActorCriticOperator 方法) (torchrl.modules.tensordict_module.ActorCriticWrapper 方法) (torchrl.modules.tensordict_module.ActorValueOperator 方法) get_vertex_by_hash() (torchrl.data.Tree 方法) get_vertex_by_id() (torchrl.data.Tree 方法) GrayScale (torchrl.envs.transforms 中的類) GrayScaleConfig (torchrl.trainers.algorithms.configs.transforms 中的類) group_optimizers (torchrl.objectives 中的類) GRPOLoss (torchrl.objectives.llm 中的類) GRPOLossOutput (torchrl.objectives.llm 中的類) GRU (torchrl.modules 中的類) GRUCell (torchrl.modules 中的類) GRUModule (torchrl.modules 中的類) gSDENoise (torchrl.envs.transforms 中的類) GSM8KEnv (torchrl.envs.llm 中的類) GSM8KPrepareQuestion (torchrl.envs.llm 中的類) GSM8KRewardParser (torchrl.envs.llm 中的類) gym_backend() (在 torchrl.envs 模組中) GymEnv() (在 torchrl.envs 模組中) GymEnvConfig (torchrl.trainers.algorithms.configs.envs_libs 中的類) GymLikeEnv (torchrl.envs 中的類) GymWrapper() (在 torchrl.envs 模組中) H H5Combine (torchrl.data 中的類) H5Split (torchrl.data 中的類) H5StorageCheckpointer (torchrl.data.replay_buffers 中的類) HabitatEnv() (在 torchrl.envs 模組中) HabitatEnvConfig (torchrl.trainers.algorithms.configs.envs_libs 中的類) half() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) HardUpdate (torchrl.objectives 中的類) Hash (torchrl.envs.transforms 中的類) HashConfig (torchrl.trainers.algorithms.configs.transforms 中的類) HashToInt (torchrl.data 中的類) History (torchrl.data.llm 中的類) hold_out_net (torchrl.objectives 中的類) hold_out_params (torchrl.objectives 中的類) I IFEvalEnv (torchrl.envs.llm 中的類) IFEvalScoreData (torchrl.envs.llm 中的類) IfEvalScorer (torchrl.envs.llm 中的類) ImmutableDatasetWriter (torchrl.data.replay_buffers 中的類) implement_for (torchrl 中的類) implements_for_spec() (torchrl.data.Binary 類方法) (torchrl.data.BinaryDiscreteTensorSpec 類方法) (torchrl.data.Bounded 類方法) (torchrl.data.BoundedTensorSpec 類方法) (torchrl.data.Categorical 類方法) (torchrl.data.Composite 類方法) (torchrl.data.CompositeSpec 類方法) (torchrl.data.DiscreteTensorSpec 類方法) (torchrl.data.LazyStackedCompositeSpec 類方法) (torchrl.data.LazyStackedTensorSpec 類方法) (torchrl.data.MultiCategorical 類方法) (torchrl.data.MultiDiscreteTensorSpec 類方法) (torchrl.data.MultiOneHot 類方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 類方法) (torchrl.data.NonTensor 類方法) (torchrl.data.NonTensorSpec 類方法) (torchrl.data.OneHot 類方法) (torchrl.data.OneHotDiscreteTensorSpec 類方法) (torchrl.data.Stacked 類方法) (torchrl.data.StackedComposite 類方法) (torchrl.data.TensorSpec 類方法) (torchrl.data.Unbounded 類方法) (torchrl.data.UnboundedContinuous 類方法) (torchrl.data.UnboundedContinuousTensorSpec 類方法) (torchrl.data.UnboundedDiscrete 類方法) (torchrl.data.UnboundedDiscreteTensorSpec 類方法) import_module() (torchrl.implement_for 類方法) in_keys (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) (torchrl.modules.llm.RemoteTransformersWrapper 屬性) in_keys_inv (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) increment_version() (torchrl.collectors.distributed.DistributedWeightUpdater 方法) (torchrl.collectors.distributed.RPCWeightUpdater 方法) (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) (torchrl.collectors.llm.vLLMUpdater 方法) (torchrl.collectors.llm.vLLMUpdaterV2 方法) (torchrl.collectors.MultiProcessedWeightUpdater 方法) (torchrl.collectors.RayWeightUpdater 方法) (torchrl.collectors.VanillaWeightUpdater 方法) (torchrl.collectors.WeightUpdaterBase 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) IndependentNormal (torchrl.modules 中的類) index() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) init() (torchrl.collectors.distributed.DistributedWeightUpdater 方法) (torchrl.collectors.distributed.RPCWeightUpdater 方法) (torchrl.collectors.llm.vLLMUpdater 方法), [1] (torchrl.collectors.llm.vLLMUpdaterV2 方法) (torchrl.collectors.MultiProcessedWeightUpdater 方法) (torchrl.collectors.RayWeightUpdater 方法) (torchrl.collectors.VanillaWeightUpdater 方法) (torchrl.collectors.WeightUpdaterBase 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.transforms.Compose 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.objectives.llm.MCAdvantage 方法) init_key (torchrl.modules.tensordict_module.MultiStepActorWrapper 屬性) init_stats() (torchrl.envs.transforms.ObservationNorm 方法) init_updater() (torchrl.collectors.aSyncDataCollector 方法) (torchrl.collectors.DataCollectorBase 方法) (torchrl.collectors.distributed.DistributedDataCollector 方法) (torchrl.collectors.distributed.DistributedSyncDataCollector 方法) (torchrl.collectors.distributed.RayCollector 方法) (torchrl.collectors.distributed.RPCDataCollector 方法) (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) (torchrl.collectors.MultiaSyncDataCollector 方法) (torchrl.collectors.MultiSyncDataCollector 方法) (torchrl.collectors.SyncDataCollector 方法) init_weight_update_group() (torchrl.modules.llm.AsyncVLLM 方法) initialize_parameters() (torchrl.modules.NoisyLazyLinear 方法) InitTracker (torchrl.envs.transforms 中的類) InitTrackerConfig (torchrl.trainers.algorithms.configs.transforms 中的類) inplace (torchrl.modules.llm.RemoteTransformersWrapper 屬性) input_spec (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) (torchrl.envs.transforms.TransformedEnv 屬性) input_spec_unbatched (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) insert() (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.transforms.Compose 方法) insert_transform() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.transforms.AutoResetEnv 方法) (torchrl.envs.transforms.TransformedEnv 方法) inv() (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.transforms.ActionDiscretizer 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.objectives.llm.MCAdvantage 方法) inv_softplus (torchrl.modules.utils 中的類) ipu() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) IQLLoss (torchrl.objectives 中的類) is_empty() (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.StackedComposite 方法) is_in() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) is_initialized() (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) is_spec_locked (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) is_tdmodule_compatible() (torchrl.data.DensifyReward 靜態方法) (torchrl.data.QueryModule 靜態方法) (torchrl.data.TensorDictMap 靜態方法) (torchrl.modules.llm.LLMWrapperBase 靜態方法) (torchrl.modules.llm.TransformersWrapper 靜態方法) (torchrl.modules.llm.vLLMWrapper 靜態方法) (torchrl.objectives.llm.GRPOLoss 靜態方法) (torchrl.objectives.llm.SFTLoss 靜態方法) is_terminal (torchrl.data.Tree 屬性) IsaacGymEnv() (在 torchrl.envs 模組中) IsaacGymEnvConfig (torchrl.trainers.algorithms.configs.envs_libs 中的類) IsaacGymWrapper() (在 torchrl.envs 模組中) IsaacLabWrapper() (在 torchrl.envs 模組中) items() (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.StackedComposite 方法) iterator() (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) (torchrl.collectors.SyncDataCollector 方法) J JumanjiEnv() (在 torchrl.envs 模組中) JumanjiEnvConfig (torchrl.trainers.algorithms.configs.envs_libs 中的類) JumanjiWrapper() (在 torchrl.envs 模組中) K keys() (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.StackedComposite 方法) KLComputation (torchrl.envs.llm.transforms 中的類) KLPENPPOLoss (torchrl.objectives 中的類) KLRewardTransform (torchrl.envs.llm.transforms 中的類) (torchrl.envs.transforms 中的類) KLRewardTransformConfig (torchrl.trainers.algorithms.configs.transforms 中的類) L launch() (torchrl.modules.llm.AsyncVLLM 類方法) layout (torchrl.modules.llm.RemoteTransformersWrapper 屬性) lazy_stack() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) LazyMemmapStorage (torchrl.data.replay_buffers 中的類) LazyMemmapStorageConfig (torchrl.trainers.algorithms.configs.data 中的類) LazyStackedCompositeSpec (torchrl.data 中的類) LazyStackedTensorSpec (torchrl.data 中的類) LazyStackStorage (torchrl.data.replay_buffers 中的類) LazyStackStorageConfig (torchrl.trainers.algorithms.configs.data 中的類) LazyTensorStorage (torchrl.data.replay_buffers 中的類) LazyTensorStorageConfig (torchrl.trainers.algorithms.configs.data 中的類) LBFGSConfig (torchrl.trainers.algorithms.configs.utils 中的類) LineariseRewards (torchrl.envs.transforms 中的類) LineariseRewardsConfig (torchrl.trainers.algorithms.configs.transforms 中的類) LionConfig (torchrl.trainers.algorithms.configs.utils 中的類) ListStorage (torchrl.data.replay_buffers 中的類) ListStorageCheckpointer (torchrl.data.replay_buffers 中的類) ListStorageConfig (torchrl.trainers.algorithms.configs.data 中的類) LLMCollector (torchrl.collectors.llm 中的類) LLMEnv (torchrl.envs.llm 中的類) LLMHashingEnv (torchrl.envs 中的類) (torchrl.envs.llm 中的類) LLMMaskedCategorical (torchrl.modules 中的類) LLMWrapperBase (torchrl.modules.llm 中的類) LMHeadActorValueOperator (torchrl.modules.tensordict_module 中的類) load() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.PairwiseDataset 類方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.PromptData 類方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.CompressedListStorage 方法) (torchrl.data.replay_buffers.LazyMemmapStorage 方法) (torchrl.data.replay_buffers.LazyStackStorage 方法) (torchrl.data.replay_buffers.LazyTensorStorage 方法) (torchrl.data.replay_buffers.ListStorage 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.replay_buffers.Storage 方法) (torchrl.data.replay_buffers.StorageEnsemble 方法) (torchrl.data.replay_buffers.TensorStorage 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.RewardData 類方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) (torchrl.data.TokenizedDatasetLoader 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) load_() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.PairwiseDataset 方法) (torchrl.data.PromptData 方法) (torchrl.data.RewardData 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) load_from_file() (torchrl.trainers.algorithms.PPOTrainer 方法) (torchrl.trainers.Trainer 方法) load_memmap() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.PairwiseDataset 類方法) (torchrl.data.PromptData 類方法) (torchrl.data.RewardData 類方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) load_state_dict() (torchrl.collectors.aSyncDataCollector 方法) (torchrl.collectors.distributed.RayCollector 方法) (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) (torchrl.collectors.MultiaSyncDataCollector 方法) (torchrl.collectors.MultiSyncDataCollector 方法) (torchrl.collectors.SyncDataCollector 方法) (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.PairwiseDataset 方法) (torchrl.data.PromptData 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.replay_buffers.CompressedListStorage 方法) (torchrl.data.RewardData 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.data.Tree 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.TrajCounter 方法) (torchrl.envs.transforms.TransformedEnv 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.CrossQLoss 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) (torchrl.objectives.llm.SFTLossOutput 方法) (torchrl.objectives.SACLoss 方法) loads() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.CompressedListStorageCheckpointer 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) loc (torchrl.envs.transforms.VecNorm 屬性) (torchrl.envs.transforms.VecNormV2 屬性) loc() (torchrl.envs.transforms.VecNormV2 方法) local_policy() (torchrl.collectors.distributed.RayCollector 方法) lock_() (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.StackedComposite 方法) log_prob() (torchrl.modules.Delta 方法) (torchrl.modules.llm.RemoteTransformersWrapper 方法) (torchrl.modules.LLMMaskedCategorical 方法) (torchrl.modules.MaskedCategorical 方法) (torchrl.modules.MaskedOneHotCategorical 方法) (torchrl.modules.OneHotCategorical 方法) (torchrl.modules.TruncatedNormal 方法) log_prob_keys (torchrl.modules.llm.RemoteTransformersWrapper 屬性) log_probs_key (torchrl.modules.llm.RemoteTransformersWrapper 屬性) Logger() (在 torchrl.record.loggers 模組中) LoggerConfig (torchrl.trainers.algorithms.configs.logging 中的類) logits (torchrl.modules.LLMMaskedCategorical 屬性) LogProbs (torchrl.modules.llm 中的類) logprobs_of_labels() (torchrl.data.RolloutFromModel 靜態方法) LogScalar (torchrl.trainers 中的類) LogValidationReward (torchrl.trainers 中的類) loss_critic() (torchrl.objectives.A2CLoss 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.PPOLoss 方法) loss_value_diff() (torchrl.objectives.IQLLoss 靜態方法) LossConfig (torchrl.trainers.algorithms.configs.objectives 中的類) LossModule (torchrl.objectives 中的類) LSTM (torchrl.modules 中的類) LSTMCell (torchrl.modules 中的類) LSTMModule (torchrl.modules 中的類) M make_async_vllm_engine (torchrl.modules.llm 中的類) make_collector_offpolicy() (在 torchrl.trainers.helpers 模組中) make_collector_onpolicy() (在 torchrl.trainers.helpers 模組中) make_composite_from_td() (在 torchrl.envs 模組中) make_cudnn_based() (torchrl.modules.GRUModule 方法) (torchrl.modules.LSTMModule 方法) make_dqn_loss() (在 torchrl.trainers.helpers 模組中) make_gsm8k_env (torchrl.envs.llm 中的類) make_mlgym (torchrl.envs.llm 中的類) make_neg_dim() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) make_node() (torchrl.data.Tree 類方法) make_noload_model() (torchrl.envs.transforms.VC1Transform 類方法) make_python_based() (torchrl.modules.GRUModule 方法) (torchrl.modules.LSTMModule 方法) make_rb_transform_and_sampler() (torchrl.envs.transforms.CatFrames 方法) make_replay_buffer() (在 torchrl.trainers.helpers 模組中) make_target_updater() (在 torchrl.trainers.helpers 模組中) make_tensordict() (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.LLMHashingEnv 方法) make_tensordict_primer() (torchrl.modules.ConsistentDropoutModule 方法) (torchrl.modules.GRUModule 方法), [1] (torchrl.modules.LSTMModule 方法), [1] make_trainer() (在 torchrl.trainers.helpers 模組中) make_value_estimator() (torchrl.objectives.A2CLoss 方法) (torchrl.objectives.CQLLoss 方法) (torchrl.objectives.CrossQLoss 方法) (torchrl.objectives.DDPGLoss 方法) (torchrl.objectives.DiscreteCQLLoss 方法) (torchrl.objectives.DiscreteSACLoss 方法) (torchrl.objectives.DistributionalDQNLoss 方法) (torchrl.objectives.DQNLoss 方法) (torchrl.objectives.DreamerActorLoss 方法) (torchrl.objectives.IQLLoss 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.SFTLoss 方法) (torchrl.objectives.LossModule 方法) (torchrl.objectives.multiagent.QMixerLoss 方法) (torchrl.objectives.PPOLoss 方法) (torchrl.objectives.REDQLoss 方法) (torchrl.objectives.ReinforceLoss 方法) (torchrl.objectives.SACLoss 方法) (torchrl.objectives.TD3BCLoss 方法) (torchrl.objectives.TD3Loss 方法) make_variant() (torchrl.envs.EnvCreator 方法) make_vllm_worker (torchrl.modules.llm 中的類) mappings (torchrl.modules.utils 中的類) MarlGroupMapType() (在 torchrl.envs 模組中) mask (torchrl.modules.LLMMaskedCategorical 屬性) mask_context() (torchrl.modules.tensordict_module.DecisionTransformerInferenceWrapper 方法) masked_dist (torchrl.modules.LLMMaskedCategorical 屬性) masked_logits (torchrl.modules.LLMMaskedCategorical 屬性) MaskedCategorical (torchrl.modules 中的類) MaskedOneHotCategorical (torchrl.modules 中的類) Masks (torchrl.modules.llm 中的類) masks_key (torchrl.modules.llm.RemoteTransformersWrapper 屬性) max_length() (torchrl.data.Tree 方法) maybe_dense_stack() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) maybe_init_target_entropy() (torchrl.objectives.CrossQLoss 方法) maybe_reset() (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) MCAdvantage (torchrl.objectives.llm 中的類) MCPToolTransform (torchrl.envs.llm.transforms 中的類) MCTSForest (torchrl.data 中的類) mean (torchrl.modules.Delta 屬性) (torchrl.modules.TanhDelta 屬性) (torchrl.modules.TanhNormal 屬性) MeltingpotEnv() (在 torchrl.envs 模組中) MeltingpotEnvConfig (torchrl.trainers.algorithms.configs.envs_libs 中的類) MeltingpotWrapper() (在 torchrl.envs 模組中) memmap() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.PairwiseDataset 方法) (torchrl.data.PromptData 方法) (torchrl.data.RewardData 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) memmap_() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.PairwiseDataset 方法) (torchrl.data.PromptData 方法) (torchrl.data.RewardData 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) memmap_like() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.PairwiseDataset 方法) (torchrl.data.PromptData 方法) (torchrl.data.RewardData 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) memmap_refresh_() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.PairwiseDataset 方法) (torchrl.data.PromptData 方法) (torchrl.data.RewardData 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) memoize_encode() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) MinariExperienceReplay (torchrl.data.datasets 中的類) missing_tolerance (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) mix() (torchrl.modules.QMixer 方法) (torchrl.modules.VDNMixer 方法) MLFlowLogger() (在 torchrl.record.loggers.mlflow 模組中) MLGymWrapper (torchrl.envs.llm 中的類) MLP (torchrl.modules 中的類) MLPConfig (torchrl.trainers.algorithms.configs.modules 中的類) mode (torchrl.modules.Delta 屬性) (torchrl.modules.IndependentNormal 屬性) (torchrl.modules.LLMMaskedCategorical 屬性) (torchrl.modules.MaskedOneHotCategorical 屬性) (torchrl.modules.OneHotCategorical 屬性) (torchrl.modules.TanhDelta 屬性) (torchrl.modules.TanhNormal 屬性) (torchrl.modules.TruncatedNormal 屬性) ModelBasedEnvBase() (在 torchrl.envs 模組中) ModelConfig (torchrl.trainers.algorithms.configs.modules 中的類) module_set() (torchrl.implement_for 方法) modules() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) MOGymEnv() (在 torchrl.envs 模組中) MOGymEnvConfig (torchrl.trainers.algorithms.configs.envs_libs 中的類) MOGymWrapper() (在 torchrl.envs 模組中) MPCPlannerBase (torchrl.modules 中的類) MPPIPlanner (torchrl.modules 中的類) mtia() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) MultiAction (torchrl.envs.transforms 中的類) MultiActionConfig (torchrl.trainers.algorithms.configs.transforms 中的類) MultiAgentConvNet (torchrl.modules 中的類) MultiAgentMLP (torchrl.modules 中的類) MultiAgentNetBase (torchrl.modules 中的類) MultiaSyncDataCollector (torchrl.collectors 中的類) MultiaSyncDataCollectorConfig (torchrl.trainers.algorithms.configs.collectors 中的類) MultiCategorical (torchrl.data 中的類) MultiDiscreteTensorSpec (torchrl.data 中的類) MultiOneHot (torchrl.data 中的類) MultiOneHotDiscreteTensorSpec (torchrl.data 中的類) MultiProcessedWeightUpdater (torchrl.collectors 中的類) MultiStep (torchrl.data 中的類) MultiStepActorWrapper (torchrl.modules.tensordict_module 中的類) MultiStepTransform (torchrl.envs.transforms.rb_transforms 中的類) MultiStepTransformConfig (torchrl.trainers.algorithms.configs.transforms 中的類) MultiSyncDataCollector (torchrl.collectors 中的類) MultiSyncDataCollectorConfig (torchrl.trainers.algorithms.configs.collectors 中的類) MultiThreadedEnv() (在 torchrl.envs 模組中) MultiThreadedEnvConfig (torchrl.trainers.algorithms.configs.envs_libs 中的類) MultiThreadedEnvWrapper() (在 torchrl.envs 模組中) N n_steps (torchrl.envs.transforms.rb_transforms.MultiStepTransform 屬性) NAdamConfig (torchrl.trainers.algorithms.configs.utils 中的類) named_buffers() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) named_children() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) named_modules() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) named_parameters() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) (torchrl.objectives.LossModule 方法) ndim (torchrl.data.Binary 屬性) (torchrl.data.BinaryDiscreteTensorSpec 屬性) (torchrl.data.Bounded 屬性) (torchrl.data.BoundedTensorSpec 屬性) (torchrl.data.Categorical 屬性) (torchrl.data.Composite 屬性) (torchrl.data.CompositeSpec 屬性) (torchrl.data.DiscreteTensorSpec 屬性) (torchrl.data.LazyStackedCompositeSpec 屬性) (torchrl.data.LazyStackedTensorSpec 屬性) (torchrl.data.MultiCategorical 屬性) (torchrl.data.MultiDiscreteTensorSpec 屬性) (torchrl.data.MultiOneHot 屬性) (torchrl.data.MultiOneHotDiscreteTensorSpec 屬性) (torchrl.data.NonTensor 屬性) (torchrl.data.NonTensorSpec 屬性) (torchrl.data.OneHot 屬性) (torchrl.data.OneHotDiscreteTensorSpec 屬性) (torchrl.data.Stacked 屬性) (torchrl.data.StackedComposite 屬性) (torchrl.data.TensorSpec 屬性) (torchrl.data.Unbounded 屬性) (torchrl.data.UnboundedContinuous 屬性) (torchrl.data.UnboundedContinuousTensorSpec 屬性) (torchrl.data.UnboundedDiscrete 屬性) (torchrl.data.UnboundedDiscreteTensorSpec 屬性) ndimension() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) Nested2TED (torchrl.data 中的類) NestedStorageCheckpointer (torchrl.data.replay_buffers 中的類) NetworkConfig (torchrl.trainers.algorithms.configs.modules 中的類) next() (torchrl.collectors.llm.RayLLMCollector 方法) (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) next_state_value (torchrl.objectives 中的類) node_observation (torchrl.data.Tree 屬性) node_observations (torchrl.data.Tree 屬性) NoisyLazyLinear (torchrl.modules 中的類) NoisyLinear (torchrl.modules 中的類) NonTensor (torchrl.data 中的類) NonTensorSpec (torchrl.data 中的類) NoopResetEnv (torchrl.envs.transforms 中的類) NoopResetEnvConfig (torchrl.trainers.algorithms.configs.transforms 中的類) num_children (torchrl.data.Tree 屬性) num_samples (torchrl.modules.llm.RemoteTransformersWrapper 屬性) num_vertices() (torchrl.data.Tree 方法) O ObsDecoder (torchrl.modules 中的類) ObsEncoder (torchrl.modules 中的類) observation_keys (torchrl.data.MCTSForest 屬性) (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) observation_spec (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) observation_spec_unbatched (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) ObservationNorm (torchrl.envs.transforms 中的類) ObservationNormConfig (torchrl.trainers.algorithms.configs.transforms 中的類) ObservationTransform (torchrl.envs.transforms 中的類) one() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) OneHot (torchrl.data 中的類) OneHotCategorical (torchrl.modules 中的類) OneHotDiscreteTensorSpec (torchrl.data 中的類) OneHotOrdinal (torchrl.modules 中的類) ones() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) OnlineDTActor (torchrl.modules 中的類) OnlineDTLoss (torchrl.objectives 中的類) OpenMLEnv() (在 torchrl.envs 模組中) OpenMLEnvConfig (torchrl.trainers.algorithms.configs.envs_libs 中的類) OpenMLExperienceReplay (torchrl.data.datasets 中的類) OpenSpielEnv() (在 torchrl.envs 模組中) OpenSpielEnvConfig (torchrl.trainers.algorithms.configs.envs_libs 中的類) OpenSpielWrapper() (在 torchrl.envs 模組中) OpenXExperienceReplay (torchrl.data.datasets 中的類) OptimizerHook (torchrl.trainers 中的類) Ordinal (torchrl.modules 中的類) OrnsteinUhlenbeckProcessModule (torchrl.modules 中的類) out_keys (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) (torchrl.modules.llm.RemoteTransformersWrapper 屬性) out_keys_inv (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) output_spec (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) (torchrl.envs.transforms.TransformedEnv 屬性) output_spec_unbatched (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) P pad_output (torchrl.modules.llm.RemoteTransformersWrapper 屬性) padding_value (torchrl.modules.MaskedCategorical 屬性) PairwiseDataset (torchrl.data 中的類) parallel_env_constructor() (在 torchrl.trainers.helpers 模組中) ParallelEnv (torchrl.envs 中的類) parameters() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) (torchrl.objectives.LossModule 方法) parent (torchrl.data.llm.TopKRewardSelector 屬性) (torchrl.data.Tree 屬性) (torchrl.envs.llm.GSM8KPrepareQuestion 屬性) (torchrl.envs.llm.GSM8KRewardParser 屬性) (torchrl.envs.llm.IfEvalScorer 屬性) (torchrl.envs.llm.transforms.AddThinkingPrompt 屬性) (torchrl.envs.llm.transforms.BrowserTransform 屬性) (torchrl.envs.llm.transforms.DataLoadingPrimer 屬性) (torchrl.envs.llm.transforms.KLComputation 屬性) (torchrl.envs.llm.transforms.KLRewardTransform 屬性) (torchrl.envs.llm.transforms.MCPToolTransform 屬性) (torchrl.envs.llm.transforms.PolicyVersion 屬性) (torchrl.envs.llm.transforms.PythonInterpreter 屬性) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) (torchrl.envs.llm.transforms.RetrieveKL 屬性) (torchrl.envs.llm.transforms.RetrieveLogProb 屬性) (torchrl.envs.llm.transforms.TemplateTransform 屬性) (torchrl.envs.llm.transforms.Tokenizer 屬性) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 屬性) (torchrl.envs.transforms.Transform 屬性) (torchrl.objectives.llm.MCAdvantage 屬性) pause() (torchrl.collectors.aSyncDataCollector 方法) (torchrl.collectors.DataCollectorBase 方法) (torchrl.collectors.distributed.DistributedDataCollector 方法) (torchrl.collectors.distributed.DistributedSyncDataCollector 方法) (torchrl.collectors.distributed.RayCollector 方法) (torchrl.collectors.distributed.RPCDataCollector 方法) (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) (torchrl.collectors.MultiaSyncDataCollector 方法) (torchrl.collectors.MultiSyncDataCollector 方法) (torchrl.collectors.SyncDataCollector 方法) PendulumEnv (torchrl.envs 中的類) PermuteTransform (torchrl.envs.transforms 中的類) PermuteTransformConfig (torchrl.trainers.algorithms.configs.transforms 中的類) PettingZooEnv() (在 torchrl.envs 模組中) PettingZooEnvConfig (torchrl.trainers.algorithms.configs.envs_libs 中的類) PettingZooWrapper() (在 torchrl.envs 模組中) PinMemoryTransform (torchrl.envs.transforms 中的類) PinMemoryTransformConfig (torchrl.trainers.algorithms.configs.transforms 中的類) PixelRenderTransform() (在 torchrl.record 模組中) planning() (torchrl.modules.CEMPlanner 方法) (torchrl.modules.MPCPlannerBase 方法) (torchrl.modules.MPPIPlanner 方法) plot() (torchrl.data.Tree 方法) policy_version (torchrl.collectors.llm.LLMCollector 屬性) (torchrl.collectors.llm.RayLLMCollector 屬性) PolicyVersion (torchrl.envs.llm.transforms 中的類) pop() (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.StackedComposite 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.transforms.Compose 方法) position_level_masking (torchrl.modules.LLMMaskedCategorical 屬性) post_hooks (torchrl.collectors.distributed.DistributedWeightUpdater 屬性) (torchrl.collectors.distributed.RPCWeightUpdater 屬性) (torchrl.collectors.llm.vLLMUpdater 屬性) (torchrl.collectors.llm.vLLMUpdaterV2 屬性) (torchrl.collectors.MultiProcessedWeightUpdater 屬性) (torchrl.collectors.RayWeightUpdater 屬性) (torchrl.collectors.VanillaWeightUpdater 屬性) (torchrl.collectors.WeightUpdaterBase 屬性) PPOLoss (torchrl.objectives 中的類) PPOLossConfig (torchrl.trainers.algorithms.configs.objectives 中的類) PPOTrainer (torchrl.trainers.algorithms 中的類) PPOTrainerConfig (torchrl.trainers.algorithms.configs.trainers 中的類) preprocess() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) prev_action (torchrl.data.Tree 屬性) primers (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) PrioritizedReplayBuffer (torchrl.data 中的類) PrioritizedSampler (torchrl.data.replay_buffers 中的類) PrioritizedSamplerConfig (torchrl.trainers.algorithms.configs.data 中的類) PrioritizedSliceSampler (torchrl.data.replay_buffers 中的類) ProbabilisticActor (torchrl.modules.tensordict_module 中的類) probs (torchrl.modules.LLMMaskedCategorical 屬性) ProcessorAsyncEnvPool (torchrl.envs 中的類) project() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) PromptData (torchrl.data 中的類) PromptTensorDictTokenizer (torchrl.data 中的類) push_weights() (torchrl.collectors.distributed.DistributedWeightUpdater 方法) (torchrl.collectors.distributed.RPCWeightUpdater 方法) (torchrl.collectors.llm.vLLMUpdater 方法) (torchrl.collectors.llm.vLLMUpdaterV2 方法) (torchrl.collectors.MultiProcessedWeightUpdater 方法) (torchrl.collectors.RayWeightUpdater 方法) (torchrl.collectors.VanillaWeightUpdater 方法) (torchrl.collectors.WeightUpdaterBase 方法), [1] push_weights_from_transformers() (torchrl.collectors.llm.vLLMUpdaterV2 方法) push_weights_from_transformers_optimized() (torchrl.collectors.llm.vLLMUpdaterV2 方法) PythonInterpreter (torchrl.envs.llm.transforms 中的類) Q QMixer (torchrl.modules 中的類) QMixerLoss (torchrl.objectives.multiagent 中的類) QueryModule (torchrl.data 中的類) qvalue_loss() (torchrl.objectives.CrossQLoss 方法) (torchrl.objectives.DiscreteSACLoss 方法) (torchrl.objectives.SACLoss 方法) (torchrl.objectives.TD3BCLoss 方法) QValueActor (torchrl.modules.tensordict_module 中的類) QValueHook (torchrl.modules 中的類) QValueModule (類,位於 torchrl.modules.tensordict_module) R R3MTransform (類,位於 torchrl.envs.transforms) R3MTransformConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) RAdamConfig (類,位於 torchrl.trainers.algorithms.configs.utils) rand() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) rand_action() (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.TransformedEnv 方法) rand_step() (位於模組 torchrl.envs) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法), [1] (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) random() (torchrl.modules.tensordict_module.SafeModule 方法) (torchrl.modules.tensordict_module.SafeProbabilisticModule 方法) random_sample() (torchrl.modules.tensordict_module.SafeModule 方法) (torchrl.modules.tensordict_module.SafeProbabilisticModule 方法) RandomCropTensorDict (類,位於 torchrl.envs.transforms) RandomCropTensorDictConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) RandomPolicy() (位於模組 torchrl.envs) RandomProjectionHash (類,位於 torchrl.data) RandomSampler (類,位於 torchrl.data.replay_buffers) RandomSamplerConfig (類,位於 torchrl.trainers.algorithms.configs.data) RayCollector (類,位於 torchrl.collectors.distributed) RayDataLoadingPrimer (類,位於 torchrl.envs.llm.transforms) RayLLMCollector (類,位於 torchrl.collectors.llm) RayReplayBuffer (類,位於 torchrl.data) RayWeightUpdater (類,位於 torchrl.collectors) read_action() (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) read_done() (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) read_obs() (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) read_reward() (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) recurrent_mode (類,位於 torchrl.modules) REDQLoss (類,位於 torchrl.objectives) register() (torchrl.trainers.BatchSubSampler 方法) (torchrl.trainers.ClearCudaCache 方法) (torchrl.trainers.CountFramesLog 方法) (torchrl.trainers.LogScalar 方法) (torchrl.trainers.LogValidationReward 方法) (torchrl.trainers.OptimizerHook 方法) (torchrl.trainers.ReplayBufferTrainer 方法) (torchrl.trainers.RewardNormalizer 方法) (torchrl.trainers.SelectKeys 方法) (torchrl.trainers.TrainerHookBase 方法) (torchrl.trainers.UpdateWeights 方法) register_backward_hook() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) register_buffer() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) register_collector() (torchrl.collectors.distributed.DistributedWeightUpdater 方法) (torchrl.collectors.distributed.RPCWeightUpdater 方法) (torchrl.collectors.llm.vLLMUpdater 方法) (torchrl.collectors.llm.vLLMUpdaterV2 方法) (torchrl.collectors.MultiProcessedWeightUpdater 方法) (torchrl.collectors.RayWeightUpdater 方法) (torchrl.collectors.VanillaWeightUpdater 方法) (torchrl.collectors.WeightUpdaterBase 方法), [1] (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) register_forward_hook() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) register_forward_pre_hook() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) register_full_backward_hook() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) register_full_backward_pre_hook() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) register_gym() (torchrl.envs.AsyncEnvPool 類方法) (torchrl.envs.ChessEnv 類方法) (torchrl.envs.EnvBase 類方法) (torchrl.envs.GymLikeEnv 類方法) (torchrl.envs.llm.ChatEnv 類方法) (torchrl.envs.llm.DatasetChatEnv 類方法) (torchrl.envs.llm.GSM8KEnv 類方法) (torchrl.envs.llm.IFEvalEnv 類方法) (torchrl.envs.llm.LLMEnv 類方法) (torchrl.envs.llm.LLMHashingEnv 類方法) (torchrl.envs.llm.MLGymWrapper 類方法) (torchrl.envs.LLMHashingEnv 類方法) (torchrl.envs.ParallelEnv 類方法) (torchrl.envs.PendulumEnv 類方法) (torchrl.envs.ProcessorAsyncEnvPool 類方法) (torchrl.envs.SerialEnv 類方法) (torchrl.envs.ThreadingAsyncEnvPool 類方法) (torchrl.envs.TicTacToeEnv 類方法) register_gym_spec_conversion() (位於模組 torchrl.envs) register_keys() (torchrl.envs.transforms.EndOfLifeTransform 方法) register_load_hook() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) register_load_state_dict_post_hook() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) register_load_state_dict_pre_hook() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) register_module() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) register_parameter() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) register_post_hook() (torchrl.collectors.distributed.DistributedWeightUpdater 方法) (torchrl.collectors.distributed.RPCWeightUpdater 方法) (torchrl.collectors.llm.vLLMUpdater 方法) (torchrl.collectors.llm.vLLMUpdaterV2 方法) (torchrl.collectors.MultiProcessedWeightUpdater 方法) (torchrl.collectors.RayWeightUpdater 方法) (torchrl.collectors.VanillaWeightUpdater 方法) (torchrl.collectors.WeightUpdaterBase 方法) register_save_hook() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) register_state_dict_post_hook() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) register_state_dict_pre_hook() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) ReinforceLoss (類,位於 torchrl.objectives) remote_collectors (torchrl.collectors.distributed.RayCollector 屬性) RemoteTensorDictReplayBuffer (類,位於 torchrl.data) RemoteTransformersWrapper (類,位於 torchrl.modules.llm) RemoveEmptySpecs (類,位於 torchrl.envs.transforms) RemoveEmptySpecsConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) RenameTransform (類,位於 torchrl.envs.transforms) RenameTransformConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) repeat_interleave_causal() (torchrl.modules.llm.TransformersWrapper 方法) repeats (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) ReplayBuffer (類,位於 torchrl.data) ReplayBufferConfig (類,位於 torchrl.trainers.algorithms.configs.data) ReplayBufferEnsemble (類,位於 torchrl.data.replay_buffers) ReplayBufferTrainer (類,位於 torchrl.trainers) reproducible_hash() (torchrl.envs.transforms.Hash 類方法) requires_grad_() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) reset() (位於模組 torchrl.envs) (torchrl.collectors.aSyncDataCollector 方法) (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) (torchrl.collectors.MultiaSyncDataCollector 方法) (torchrl.collectors.MultiSyncDataCollector 方法) (torchrl.collectors.SyncDataCollector 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法), [1] (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法), [1] (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.implement_for 類方法) reset_dataloader() (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) reset_keys (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) reset_noise (類,位於 torchrl.modules) reset_out_keys() (torchrl.data.DensifyReward 方法) (torchrl.data.QueryModule 方法) (torchrl.data.TensorDictMap 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.SFTLoss 方法) reset_parameters() (torchrl.modules.MultiAgentNetBase 方法) reset_parameters_recursive() (torchrl.data.DensifyReward 方法) (torchrl.data.QueryModule 方法) (torchrl.data.TensorDictMap 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.SFTLoss 方法) (torchrl.objectives.LossModule 方法) reset_parent() (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.transforms.Transform 方法) reshape() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) Resize (類,位於 torchrl.envs.transforms) ResizeConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) RetrieveKL (類,位於 torchrl.envs.llm.transforms) RetrieveLogProb (類,位於 torchrl.envs.llm.transforms) reward2go (類,位於 torchrl.objectives.value.functional) Reward2GoTransform (類,位於 torchrl.envs.transforms) Reward2GoTransformConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) reward_key (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) reward_keys (torchrl.data.MCTSForest 屬性) (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) reward_spec (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) reward_spec_unbatched (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) RewardClipping (類,位於 torchrl.envs.transforms) RewardClippingConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) RewardData (類,位於 torchrl.data) RewardNormalizer (類,位於 torchrl.trainers) RewardScaling (類,位於 torchrl.envs.transforms) RewardScalingConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) RewardSum (類,位於 torchrl.envs.transforms) RewardSumConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) RMSpropConfig (類,位於 torchrl.trainers.algorithms.configs.utils) RoboHiveEnv() (位於模組 torchrl.envs) RoboHiveEnvConfig (類,位於 torchrl.trainers.algorithms.configs.envs_libs) RobosetExperienceReplay (類,位於 torchrl.data.datasets) rollout (torchrl.collectors.llm.LLMCollector 屬性) (torchrl.collectors.llm.RayLLMCollector 屬性) rollout() (位於模組 torchrl.envs) (torchrl.collectors.SyncDataCollector 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法), [1] (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) rollout_from_path() (torchrl.data.Tree 方法) RolloutFromModel (類,位於 torchrl.data) RoundRobinWriter (類,位於 torchrl.data.replay_buffers) RoundRobinWriterConfig (類,位於 torchrl.trainers.algorithms.configs.data) RPCDataCollector (類,位於 torchrl.collectors.distributed) RPCWeightUpdater (類,位於 torchrl.collectors.distributed) RpropConfig (類,位於 torchrl.trainers.algorithms.configs.utils) rsample() (torchrl.modules.Delta 方法) (torchrl.modules.LLMMaskedCategorical 方法) (torchrl.modules.MaskedOneHotCategorical 方法) (torchrl.modules.OneHotCategorical 方法) RSSMPosterior (類,位於 torchrl.modules) RSSMPrior (類,位於 torchrl.modules) S SACLoss (類,位於 torchrl.objectives) SafeModule (類,位於 torchrl.modules.tensordict_module) SafeProbabilisticModule (類,位於 torchrl.modules.tensordict_module) SafeProbabilisticTensorDictSequential (類,位於 torchrl.modules.tensordict_module) SafeSequential (類,位於 torchrl.modules.tensordict_module) sample() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) (torchrl.modules.Delta 方法) (torchrl.modules.LLMMaskedCategorical 方法) (torchrl.modules.MaskedCategorical 方法) (torchrl.modules.MaskedOneHotCategorical 方法) (torchrl.modules.OneHotCategorical 方法) Sampler (類,位於 torchrl.data.replay_buffers) sampler (torchrl.data.datasets.AtariDQNExperienceReplay 屬性) (torchrl.data.datasets.BaseDatasetExperienceReplay 屬性) (torchrl.data.datasets.D4RLExperienceReplay 屬性) (torchrl.data.datasets.GenDGRLExperienceReplay 屬性) (torchrl.data.datasets.MinariExperienceReplay 屬性) (torchrl.data.datasets.OpenMLExperienceReplay 屬性) (torchrl.data.datasets.OpenXExperienceReplay 屬性) (torchrl.data.datasets.RobosetExperienceReplay 屬性) (torchrl.data.datasets.VD4RLExperienceReplay 屬性) (torchrl.data.PrioritizedReplayBuffer 屬性) (torchrl.data.RayReplayBuffer 屬性) (torchrl.data.RemoteTensorDictReplayBuffer 屬性) (torchrl.data.replay_buffers.ReplayBufferEnsemble 屬性) (torchrl.data.ReplayBuffer 屬性) (torchrl.data.TensorDictPrioritizedReplayBuffer 屬性) (torchrl.data.TensorDictReplayBuffer 屬性) SamplerEnsemble (類,位於 torchrl.data.replay_buffers) SamplerWithoutReplacement (類,位於 torchrl.data.replay_buffers) SamplerWithoutReplacementConfig (類,位於 torchrl.trainers.algorithms.configs.data) save() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.PairwiseDataset 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.PromptData 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.CompressedListStorage 方法) (torchrl.data.replay_buffers.LazyMemmapStorage 方法) (torchrl.data.replay_buffers.LazyStackStorage 方法) (torchrl.data.replay_buffers.LazyTensorStorage 方法) (torchrl.data.replay_buffers.ListStorage 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.replay_buffers.Storage 方法) (torchrl.data.replay_buffers.StorageEnsemble 方法) (torchrl.data.replay_buffers.TensorStorage 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.RewardData 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) scale (torchrl.envs.transforms.VecNorm 屬性) (torchrl.envs.transforms.VecNormV2 屬性) scale() (torchrl.envs.transforms.VecNormV2 方法) select_out_keys() (torchrl.data.DensifyReward 方法) (torchrl.data.QueryModule 方法) (torchrl.data.TensorDictMap 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.SFTLoss 方法) selected_actions (torchrl.data.Tree 屬性) SelectKeys (類,位於 torchrl.trainers) SelectTransform (類,位於 torchrl.envs.transforms) SelectTransformConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) separates() (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.StackedComposite 方法) SerialEnv (類,位於 torchrl.envs) set() (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.PairwiseDataset 方法) (torchrl.data.PromptData 方法) (torchrl.data.RewardData 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) set_attr() (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) set_auto_unwrap_transformed_env (類,位於 torchrl) set_container() (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.transforms.Transform 方法) set_exploration_type() (位於模組 torchrl.envs) set_extra_state() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.VecNorm 方法) (torchrl.envs.transforms.VecNormV2 方法), [1] (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) set_gym_backend() (位於模組 torchrl.envs) set_info_dict_reader() (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) set_keys() (torchrl.objectives.CrossQLoss 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.SFTLoss 方法) (torchrl.objectives.LossModule 方法) (torchrl.objectives.value.ValueEstimatorBase 方法) set_missing_tolerance() (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.transforms.TransformedEnv 方法) set_provisional_n() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) set_recurrent_mode (類,位於 torchrl.modules) set_recurrent_mode() (torchrl.modules.GRUModule 方法) (torchrl.modules.LSTMModule 方法) set_sampler() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) set_seed() (位於模組 torchrl.envs) (torchrl.collectors.aSyncDataCollector 方法) (torchrl.collectors.distributed.RayCollector 方法) (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) (torchrl.collectors.MultiaSyncDataCollector 方法) (torchrl.collectors.MultiSyncDataCollector 方法) (torchrl.collectors.SyncDataCollector 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法), [1] (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.TransformedEnv 方法) set_spec_lock_() (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) set_storage() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) set_submodule() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) set_tensor_keys() (torchrl.modules.tensordict_module.DecisionTransformerInferenceWrapper 方法) set_tokenizer() (torchrl.modules.llm.vLLMWrapper 方法) set_writer() (torchrl.data.datasets.AtariDQNExperienceReplay 方法) (torchrl.data.datasets.BaseDatasetExperienceReplay 方法) (torchrl.data.datasets.D4RLExperienceReplay 方法) (torchrl.data.datasets.GenDGRLExperienceReplay 方法) (torchrl.data.datasets.MinariExperienceReplay 方法) (torchrl.data.datasets.OpenMLExperienceReplay 方法) (torchrl.data.datasets.OpenXExperienceReplay 方法) (torchrl.data.datasets.RobosetExperienceReplay 方法) (torchrl.data.datasets.VD4RLExperienceReplay 方法) (torchrl.data.PrioritizedReplayBuffer 方法) (torchrl.data.RayReplayBuffer 方法) (torchrl.data.RemoteTensorDictReplayBuffer 方法) (torchrl.data.replay_buffers.ReplayBufferEnsemble 方法) (torchrl.data.ReplayBuffer 方法) (torchrl.data.TensorDictPrioritizedReplayBuffer 方法) (torchrl.data.TensorDictReplayBuffer 方法) SFTLoss (類,位於 torchrl.objectives.llm) SFTLossOutput (類,位於 torchrl.objectives.llm) SGDConfig (類,位於 torchrl.trainers.algorithms.configs.utils) shape (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) share_memory() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) shutdown() (torchrl.collectors.aSyncDataCollector 方法) (torchrl.collectors.distributed.RayCollector 方法) (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) (torchrl.collectors.MultiaSyncDataCollector 方法) (torchrl.collectors.MultiSyncDataCollector 方法) (torchrl.collectors.SyncDataCollector 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.modules.llm.AsyncVLLM 方法) SignTransform (類,位於 torchrl.envs.transforms) SignTransformConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) SipHash (類,位於 torchrl.data) SliceSampler (類,位於 torchrl.data.replay_buffers) SliceSamplerConfig (類,位於 torchrl.trainers.algorithms.configs.data) SliceSamplerWithoutReplacement (類,位於 torchrl.data.replay_buffers) SliceSamplerWithoutReplacementConfig (類,位於 torchrl.trainers.algorithms.configs.data) SMACv2Env() (位於模組 torchrl.envs) SMACv2EnvConfig (類,位於 torchrl.trainers.algorithms.configs.envs_libs) SMACv2Wrapper() (位於模組 torchrl.envs) SoftUpdate (類,位於 torchrl.objectives) SparseAdamConfig (類,位於 torchrl.trainers.algorithms.configs.utils) specs (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) split_trajectories() (位於模組 torchrl.collectors.utils) squeeze() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) Squeeze2dLayer (類,位於 torchrl.modules) SqueezeLayer (類,位於 torchrl.modules) SqueezeTransform (類,位於 torchrl.envs.transforms) SqueezeTransformConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) Stack (類,位於 torchrl.envs.transforms) stack() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) stack_method (torchrl.envs.llm.transforms.RayDataLoadingPrimer 屬性) StackConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) Stacked (類,位於 torchrl.data) StackedComposite (類,位於 torchrl.data) standard_normal (torchrl.envs.transforms.VecNorm 屬性) (torchrl.envs.transforms.VecNormV2 屬性) standard_normal() (torchrl.envs.transforms.VecNormV2 方法) start() (torchrl.collectors.aSyncDataCollector 方法) (torchrl.collectors.DataCollectorBase 方法) (torchrl.collectors.distributed.DistributedDataCollector 方法) (torchrl.collectors.distributed.DistributedSyncDataCollector 方法) (torchrl.collectors.distributed.RayCollector 方法) (torchrl.collectors.distributed.RPCDataCollector 方法) (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) (torchrl.collectors.MultiaSyncDataCollector 方法) (torchrl.collectors.MultiSyncDataCollector 方法) (torchrl.collectors.SyncDataCollector 方法) state_dict() (torchrl.collectors.aSyncDataCollector 方法) (torchrl.collectors.distributed.RayCollector 方法) (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) (torchrl.collectors.MultiaSyncDataCollector 方法) (torchrl.collectors.MultiSyncDataCollector 方法) (torchrl.collectors.SyncDataCollector 方法) (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.PairwiseDataset 方法) (torchrl.data.PromptData 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.replay_buffers.CompressedListStorage 方法) (torchrl.data.RewardData 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.data.Tree 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.Hash 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.TrajCounter 方法) (torchrl.envs.transforms.TransformedEnv 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.CrossQLoss 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) (torchrl.objectives.llm.SFTLossOutput 方法) (torchrl.objectives.SACLoss 方法) state_keys (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) state_spec (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) state_spec_unbatched (torchrl.envs.AsyncEnvPool 屬性) (torchrl.envs.ChessEnv 屬性) (torchrl.envs.EnvBase 屬性) (torchrl.envs.GymLikeEnv 屬性) (torchrl.envs.llm.ChatEnv 屬性) (torchrl.envs.llm.DatasetChatEnv 屬性) (torchrl.envs.llm.GSM8KEnv 屬性) (torchrl.envs.llm.IFEvalEnv 屬性) (torchrl.envs.llm.LLMEnv 屬性) (torchrl.envs.llm.LLMHashingEnv 屬性) (torchrl.envs.llm.MLGymWrapper 屬性) (torchrl.envs.LLMHashingEnv 屬性) (torchrl.envs.ParallelEnv 屬性) (torchrl.envs.PendulumEnv 屬性) (torchrl.envs.ProcessorAsyncEnvPool 屬性) (torchrl.envs.SerialEnv 屬性) (torchrl.envs.ThreadingAsyncEnvPool 屬性) (torchrl.envs.TicTacToeEnv 屬性) stateless_init_process_group (類,位於 torchrl.modules.llm) stateless_init_process_group_async (類,位於 torchrl.modules.llm) step() (位於模組 torchrl.envs) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法), [1] (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法), [1] (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.modules.AdditiveGaussianModule 方法) (torchrl.modules.EGreedyModule 方法) (torchrl.modules.OrnsteinUhlenbeckProcessModule 方法) step_and_maybe_reset() (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) step_mdp() (位於模組 torchrl.envs) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) StepCounter (類,位於 torchrl.envs.transforms) StepCounterConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) stop_remote_collectors() (torchrl.collectors.distributed.RayCollector 方法) Storage (類,位於 torchrl.data.replay_buffers) storage (torchrl.data.datasets.AtariDQNExperienceReplay 屬性) (torchrl.data.datasets.BaseDatasetExperienceReplay 屬性) (torchrl.data.datasets.D4RLExperienceReplay 屬性) (torchrl.data.datasets.GenDGRLExperienceReplay 屬性) (torchrl.data.datasets.MinariExperienceReplay 屬性) (torchrl.data.datasets.OpenMLExperienceReplay 屬性) (torchrl.data.datasets.OpenXExperienceReplay 屬性) (torchrl.data.datasets.RobosetExperienceReplay 屬性) (torchrl.data.datasets.VD4RLExperienceReplay 屬性) (torchrl.data.PrioritizedReplayBuffer 屬性) (torchrl.data.RayReplayBuffer 屬性) (torchrl.data.RemoteTensorDictReplayBuffer 屬性) (torchrl.data.replay_buffers.ReplayBufferEnsemble 屬性) (torchrl.data.ReplayBuffer 屬性) (torchrl.data.TensorDictPrioritizedReplayBuffer 屬性) (torchrl.data.TensorDictReplayBuffer 屬性) StorageCheckpointerBase (類,位於 torchrl.data.replay_buffers) StorageEnsemble (類,位於 torchrl.data.replay_buffers) StorageEnsembleCheckpointer (類,位於 torchrl.data.replay_buffers) StorageEnsembleConfig (類,位於 torchrl.trainers.algorithms.configs.data) StorageEnsembleWriterConfig (類,位於 torchrl.trainers.algorithms.configs.data) submitit_delayed_launcher (類,位於 torchrl.collectors.distributed) support (torchrl.modules.TanhNormal 屬性) sync_async_collector() (位於模組 torchrl.trainers.helpers) sync_sync_collector() (位於模組 torchrl.trainers.helpers) SyncDataCollector (類,位於 torchrl.collectors) SyncDataCollectorConfig (類,位於 torchrl.trainers.algorithms.configs.collectors) T TanhDelta (類,位於 torchrl.modules) TanhModule (類,位於 torchrl.modules.tensordict_module) TanhNormal (類,位於 torchrl.modules) TanhNormalModelConfig (類,位於 torchrl.trainers.algorithms.configs.modules) target_entropy_buffer (torchrl.objectives.CrossQLoss 屬性) TargetReturn (類,位於 torchrl.envs.transforms) TargetReturnConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) td0_advantage_estimate (類,位於 torchrl.objectives.value.functional) td0_return_estimate (類,位於 torchrl.objectives.value.functional) TD0Estimator (類,位於 torchrl.objectives.value) td1_advantage_estimate (類,位於 torchrl.objectives.value.functional) td1_return_estimate (類,位於 torchrl.objectives.value.functional) TD1Estimator (類,位於 torchrl.objectives.value) TD3BCLoss (類,位於 torchrl.objectives) TD3Loss (類,位於 torchrl.objectives) td_lambda_advantage_estimate (類,位於 torchrl.objectives.value.functional) td_lambda_return_estimate (類,位於 torchrl.objectives.value.functional) TDLambdaEstimator (類,位於 torchrl.objectives.value) TED2Flat (類,位於 torchrl.data) TED2Nested (類,位於 torchrl.data) TemplateTransform (類,位於 torchrl.envs.llm.transforms) TensorboardLogger() (位於模組 torchrl.record.loggers.tensorboard) TensorboardLoggerConfig (類,位於 torchrl.trainers.algorithms.configs.logging) TensorDictMap (類,位於 torchrl.data) TensorDictMaxValueWriter (類,位於 torchrl.data.replay_buffers) TensorDictModuleConfig (類,位於 torchrl.trainers.algorithms.configs.modules) TensorDictPrimer (類,位於 torchrl.envs.transforms) TensorDictPrimerConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) TensorDictPrioritizedReplayBuffer (類,位於 torchrl.data) TensorDictRecorder() (位於模組 torchrl.record) TensorDictReplayBuffer (類,位於 torchrl.data) TensorDictReplayBufferConfig (類,位於 torchrl.trainers.algorithms.configs.data) TensorDictRoundRobinWriter (類,位於 torchrl.data.replay_buffers) TensorDictTokenizer (類,位於 torchrl.data) TensorMap (類,位於 torchrl.data) TensorSpec (類,位於 torchrl.data) TensorStorage (類,位於 torchrl.data.replay_buffers) TensorStorageCheckpointer (類,位於 torchrl.data.replay_buffers) TensorStorageConfig (類,位於 torchrl.trainers.algorithms.configs.data) terminated_or_truncated() (位於模組 torchrl.envs) Text (類,位於 torchrl.modules.llm) text_key (torchrl.modules.llm.RemoteTransformersWrapper 屬性) ThreadingAsyncEnvPool (類,位於 torchrl.envs) TicTacToeEnv (類,位於 torchrl.envs) TimeMaxPool (類,位於 torchrl.envs.transforms) TimeMaxPoolConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) Timer (類,位於 torchrl.envs.transforms) TimerConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) to() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.BinaryToDecimal 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.HashToInt 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.MultiStep 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorDictMap 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.Compose 方法) (torchrl.envs.transforms.R3MTransform 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.TensorDictPrimer 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.envs.transforms.TransformedEnv 方法) (torchrl.envs.transforms.VC1Transform 方法) (torchrl.envs.transforms.VIPTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.modules.tensordict_module.SafeModule 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) to_bytestream() (torchrl.data.replay_buffers.CompressedListStorage 方法) to_categorical() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) to_categorical_spec() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) to_empty() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) to_numpy() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) to_observation_norm() (torchrl.envs.transforms.VecNorm 方法) (torchrl.envs.transforms.VecNormV2 方法) to_one_hot() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) to_one_hot_spec() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) to_string() (torchrl.data.MCTSForest 方法) (torchrl.data.Tree 方法) to_tensordict() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.PairwiseDataset 方法) (torchrl.data.PromptData 方法) (torchrl.data.RewardData 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) TokenizedDatasetLoader (類,位於 torchrl.data) Tokenizer (類,位於 torchrl.envs.llm.transforms) (torchrl.envs.transforms 中的類) TokenizerConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) Tokens (類,位於 torchrl.modules.llm) tokens_key (torchrl.modules.llm.RemoteTransformersWrapper 屬性) TopKRewardSelector (類,位於 torchrl.data.llm) total_dialog_turns (torchrl.collectors.llm.RayLLMCollector 屬性) ToTensorImage (類,位於 torchrl.envs.transforms) ToTensorImageConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) train() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.TransformedEnv 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) Trainer (類,位於 torchrl.trainers) TrainerConfig (類,位於 torchrl.trainers.algorithms.configs.trainers) TrainerHookBase (類,位於 torchrl.trainers) TrajCounter (類,位於 torchrl.envs.transforms) TrajCounterConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) Transform (類,位於 torchrl.envs.transforms) transform_action_spec() (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.transforms.Compose 方法) (torchrl.envs.transforms.DeviceCastTransform 方法) (torchrl.envs.transforms.ObservationNorm 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.envs.transforms.UnaryTransform 方法) (torchrl.envs.transforms.UnsqueezeTransform 方法) (torchrl.objectives.llm.MCAdvantage 方法) transform_done_spec() (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.transforms.DeviceCastTransform 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.Stack 方法) (torchrl.envs.transforms.Tokenizer 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.envs.transforms.UnaryTransform 方法) (torchrl.objectives.llm.MCAdvantage 方法) transform_env_batch_size() (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.transforms.BatchSizeTransform 方法) (torchrl.envs.transforms.Compose 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.objectives.llm.MCAdvantage 方法) transform_env_device() (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.transforms.Compose 方法) (torchrl.envs.transforms.DeviceCastTransform 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.objectives.llm.MCAdvantage 方法) transform_input_spec() (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.transforms.ActionDiscretizer 方法) (torchrl.envs.transforms.BatchSizeTransform 方法) (torchrl.envs.transforms.Compose 方法) (torchrl.envs.transforms.DeviceCastTransform 方法) (torchrl.envs.transforms.DiscreteActionProjection 方法) (torchrl.envs.transforms.DTypeCastTransform 方法) (torchrl.envs.transforms.MultiAction 方法) (torchrl.envs.transforms.PermuteTransform 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.RemoveEmptySpecs 方法) (torchrl.envs.transforms.RenameTransform 方法) (torchrl.envs.transforms.RewardSum 方法) (torchrl.envs.transforms.Stack 方法) (torchrl.envs.transforms.StepCounter 方法) (torchrl.envs.transforms.TargetReturn 方法) (torchrl.envs.transforms.TensorDictPrimer 方法) (torchrl.envs.transforms.Tokenizer 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.envs.transforms.UnaryTransform 方法) (torchrl.envs.transforms.VIPRewardTransform 方法) (torchrl.objectives.llm.MCAdvantage 方法) transform_observation_spec() (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.transforms.CatFrames 方法) (torchrl.envs.transforms.CatTensors 方法) (torchrl.envs.transforms.CenterCrop 方法) (torchrl.envs.transforms.ClipTransform 方法) (torchrl.envs.transforms.Compose 方法) (torchrl.envs.transforms.Crop 方法) (torchrl.envs.transforms.DeviceCastTransform 方法) (torchrl.envs.transforms.DTypeCastTransform 方法) (torchrl.envs.transforms.EndOfLifeTransform 方法) (torchrl.envs.transforms.FlattenObservation 方法) (torchrl.envs.transforms.GrayScale 方法) (torchrl.envs.transforms.InitTracker 方法) (torchrl.envs.transforms.ObservationNorm 方法) (torchrl.envs.transforms.PermuteTransform 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.Resize 方法) (torchrl.envs.transforms.RewardSum 方法) (torchrl.envs.transforms.SignTransform 方法) (torchrl.envs.transforms.Stack 方法) (torchrl.envs.transforms.StepCounter 方法) (torchrl.envs.transforms.TargetReturn 方法) (torchrl.envs.transforms.TensorDictPrimer 方法) (torchrl.envs.transforms.TimeMaxPool 方法) (torchrl.envs.transforms.Timer 方法) (torchrl.envs.transforms.Tokenizer 方法) (torchrl.envs.transforms.ToTensorImage 方法) (torchrl.envs.transforms.TrajCounter 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.envs.transforms.UnaryTransform 方法) (torchrl.envs.transforms.UnsqueezeTransform 方法) (torchrl.envs.transforms.VC1Transform 方法) (torchrl.envs.transforms.VecGymEnvTransform 方法) (torchrl.envs.transforms.VecNorm 方法) (torchrl.envs.transforms.VecNormV2 方法), [1] (torchrl.objectives.llm.MCAdvantage 方法) transform_output_spec() (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.transforms.BatchSizeTransform 方法) (torchrl.envs.transforms.Compose 方法) (torchrl.envs.transforms.DeviceCastTransform 方法) (torchrl.envs.transforms.DTypeCastTransform 方法) (torchrl.envs.transforms.ExcludeTransform 方法) (torchrl.envs.transforms.KLRewardTransform 方法) (torchrl.envs.transforms.MultiAction 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.RemoveEmptySpecs 方法) (torchrl.envs.transforms.RenameTransform 方法) (torchrl.envs.transforms.SelectTransform 方法) (torchrl.envs.transforms.StepCounter 方法) (torchrl.envs.transforms.Tokenizer 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.envs.transforms.UnaryTransform 方法) (torchrl.envs.transforms.VecNormV2 方法), [1] (torchrl.objectives.llm.MCAdvantage 方法) transform_reward_spec() (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.transforms.BinarizeReward 方法) (torchrl.envs.transforms.ClipTransform 方法) (torchrl.envs.transforms.Compose 方法) (torchrl.envs.transforms.DeviceCastTransform 方法) (torchrl.envs.transforms.LineariseRewards 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.RewardClipping 方法) (torchrl.envs.transforms.RewardScaling 方法) (torchrl.envs.transforms.RewardSum 方法) (torchrl.envs.transforms.SignTransform 方法) (torchrl.envs.transforms.Stack 方法) (torchrl.envs.transforms.Tokenizer 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.envs.transforms.UnaryTransform 方法) (torchrl.envs.transforms.UnsqueezeTransform 方法) (torchrl.envs.transforms.VecNormV2 方法), [1] (torchrl.objectives.llm.MCAdvantage 方法) transform_state_spec() (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.transforms.Compose 方法) (torchrl.envs.transforms.DeviceCastTransform 方法) (torchrl.envs.transforms.ObservationNorm 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.envs.transforms.Transform 方法) (torchrl.envs.transforms.UnaryTransform 方法) (torchrl.envs.transforms.UnsqueezeTransform 方法) (torchrl.objectives.llm.MCAdvantage 方法) TransformConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) transformed_env_constructor() (位於模組 torchrl.trainers.helpers) TransformedEnv (類,位於 torchrl.envs.transforms) TransformedEnvConfig (類,位於 torchrl.trainers.algorithms.configs.envs) TransformersWrapper (類,位於 torchrl.modules.llm) Tree (類,位於 torchrl.data) TruncatedNormal (類,位於 torchrl.modules) type() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) type_check() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) U UnaryTransform (類,位於 torchrl.envs.transforms) UnaryTransformConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) unbind() (torchrl.data.llm.ContentBase 方法) (torchrl.data.llm.History 方法) (torchrl.data.PairwiseDataset 方法) (torchrl.data.PromptData 方法) (torchrl.data.RewardData 方法) (torchrl.data.Tree 方法) (torchrl.envs.llm.IFEvalScoreData 方法) (torchrl.modules.llm.ChatHistory 方法) (torchrl.modules.llm.LogProbs 方法) (torchrl.modules.llm.Masks 方法) (torchrl.modules.llm.Text 方法) (torchrl.modules.llm.Tokens 方法) (torchrl.objectives.llm.GRPOLossOutput 方法) (torchrl.objectives.llm.SFTLossOutput 方法) Unbounded (類,位於 torchrl.data) UnboundedContinuous (類,位於 torchrl.data) UnboundedContinuousTensorSpec (類,位於 torchrl.data) UnboundedDiscrete (類,位於 torchrl.data) UnboundedDiscreteTensorSpec (類,位於 torchrl.data) unflatten() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) unfreeze() (torchrl.envs.transforms.VecNorm 方法) (torchrl.envs.transforms.VecNormV2 方法), [1] UnityMLAgentsEnv() (位於模組 torchrl.envs) UnityMLAgentsEnvConfig (類,位於 torchrl.trainers.algorithms.configs.envs_libs) UnityMLAgentsWrapper() (位於模組 torchrl.envs) unlock_() (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.StackedComposite 方法) unsqueeze() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) UnsqueezeTransform (類,位於 torchrl.envs.transforms) UnsqueezeTransformConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) update() (torchrl.data.AdaptiveKLController 方法) update_kwargs() (torchrl.envs.ParallelEnv 方法) (torchrl.envs.SerialEnv 方法) update_mask() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) update_policy_weights_() (torchrl.collectors.aSyncDataCollector 方法) (torchrl.collectors.DataCollectorBase 方法) (torchrl.collectors.distributed.DistributedDataCollector 方法) (torchrl.collectors.distributed.DistributedSyncDataCollector 方法) (torchrl.collectors.distributed.RayCollector 方法) (torchrl.collectors.distributed.RPCDataCollector 方法) (torchrl.collectors.llm.LLMCollector 方法) (torchrl.collectors.llm.RayLLMCollector 方法) (torchrl.collectors.MultiaSyncDataCollector 方法) (torchrl.collectors.MultiSyncDataCollector 方法) (torchrl.collectors.SyncDataCollector 方法) update_priority() (torchrl.data.replay_buffers.PrioritizedSampler 方法) (torchrl.data.replay_buffers.PrioritizedSliceSampler 方法) update_weights() (torchrl.collectors.distributed.DistributedWeightUpdater 方法) (torchrl.collectors.distributed.RPCWeightUpdater 方法) (torchrl.modules.llm.AsyncVLLM 方法) UpdateWeights (類,位於 torchrl.trainers) V valid_paths() (torchrl.data.Tree 方法) value_estimate() (torchrl.objectives.value.GAE 方法) (torchrl.objectives.value.TD0Estimator 方法) (torchrl.objectives.value.TD1Estimator 方法) (torchrl.objectives.value.TDLambdaEstimator 方法) (torchrl.objectives.value.ValueEstimatorBase 方法) value_estimator (torchrl.objectives.llm.GRPOLoss 屬性) (torchrl.objectives.llm.SFTLoss 屬性) (torchrl.objectives.LossModule 屬性) value_loss() (torchrl.objectives.SACLoss 方法) ValueEstimatorBase (類,位於 torchrl.objectives.value) ValueEstimators (類,位於 torchrl.objectives) ValueModelConfig (類,位於 torchrl.trainers.algorithms.configs.modules) ValueOperator (類,位於 torchrl.modules.tensordict_module) values() (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.StackedComposite 方法) VanillaWeightUpdater (類,位於 torchrl.collectors) VC1Transform (類,位於 torchrl.envs.transforms) VC1TransformConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) VD4RLExperienceReplay (類,位於 torchrl.data.datasets) VDNMixer (類,位於 torchrl.modules) vec_generalized_advantage_estimate (類,位於 torchrl.objectives.value.functional) vec_td1_advantage_estimate (類,位於 torchrl.objectives.value.functional) vec_td1_return_estimate (類,位於 torchrl.objectives.value.functional) vec_td_lambda_advantage_estimate (類,位於 torchrl.objectives.value.functional) vec_td_lambda_return_estimate (類,位於 torchrl.objectives.value.functional) VecGymEnvTransform (類,位於 torchrl.envs.transforms) VecGymEnvTransformConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) VecNorm (類,位於 torchrl.envs.transforms) VecNormConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) VecNormV2 (類,位於 torchrl.envs.transforms) VecNormV2Config (類,位於 torchrl.trainers.algorithms.configs.transforms) version (torchrl.envs.llm.transforms.PolicyVersion 屬性) vertices() (torchrl.data.Tree 方法) VideoRecorder() (位於模組 torchrl.record) view() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) VIPRewardTransform (類,位於 torchrl.envs.transforms) VIPRewardTransformConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) VIPTransform (類,位於 torchrl.envs.transforms) VIPTransformConfig (類,位於 torchrl.trainers.algorithms.configs.transforms) visits (torchrl.data.Tree 屬性) vLLMUpdater (類,位於 torchrl.collectors.llm) vLLMUpdaterV2 (類,位於 torchrl.collectors.llm) vLLMWrapper (類,位於 torchrl.modules.llm) vmap_randomness (torchrl.objectives.llm.GRPOLoss 屬性) (torchrl.objectives.llm.SFTLoss 屬性) (torchrl.objectives.LossModule 屬性) VmapModule (類,位於 torchrl.modules) VmasEnv() (位於模組 torchrl.envs) VmasEnvConfig (類,位於 torchrl.trainers.algorithms.configs.envs_libs) VmasWrapper() (位於模組 torchrl.envs) W WandbLogger() (位於模組 torchrl.record.loggers.wandb) WandbLoggerConfig (類,位於 torchrl.trainers.algorithms.configs.logging) weight_updater (torchrl.collectors.llm.RayLLMCollector 屬性) WeightUpdaterBase (類,位於 torchrl.collectors) WorldModelWrapper (類,位於 torchrl.modules.tensordict_module) write_count (torchrl.data.datasets.AtariDQNExperienceReplay 屬性) (torchrl.data.datasets.BaseDatasetExperienceReplay 屬性) (torchrl.data.datasets.D4RLExperienceReplay 屬性) (torchrl.data.datasets.GenDGRLExperienceReplay 屬性) (torchrl.data.datasets.MinariExperienceReplay 屬性) (torchrl.data.datasets.OpenMLExperienceReplay 屬性) (torchrl.data.datasets.OpenXExperienceReplay 屬性) (torchrl.data.datasets.RobosetExperienceReplay 屬性) (torchrl.data.datasets.VD4RLExperienceReplay 屬性) (torchrl.data.PrioritizedReplayBuffer 屬性) (torchrl.data.RayReplayBuffer 屬性) (torchrl.data.RemoteTensorDictReplayBuffer 屬性) (torchrl.data.replay_buffers.ReplayBufferEnsemble 屬性) (torchrl.data.ReplayBuffer 屬性) (torchrl.data.TensorDictPrioritizedReplayBuffer 屬性) (torchrl.data.TensorDictReplayBuffer 屬性) Writer (類,位於 torchrl.data.replay_buffers) writer (torchrl.data.datasets.AtariDQNExperienceReplay 屬性) (torchrl.data.datasets.BaseDatasetExperienceReplay 屬性) (torchrl.data.datasets.D4RLExperienceReplay 屬性) (torchrl.data.datasets.GenDGRLExperienceReplay 屬性) (torchrl.data.datasets.MinariExperienceReplay 屬性) (torchrl.data.datasets.OpenMLExperienceReplay 屬性) (torchrl.data.datasets.OpenXExperienceReplay 屬性) (torchrl.data.datasets.RobosetExperienceReplay 屬性) (torchrl.data.datasets.VD4RLExperienceReplay 屬性) (torchrl.data.PrioritizedReplayBuffer 屬性) (torchrl.data.RayReplayBuffer 屬性) (torchrl.data.RemoteTensorDictReplayBuffer 屬性) (torchrl.data.replay_buffers.ReplayBufferEnsemble 屬性) (torchrl.data.ReplayBuffer 屬性) (torchrl.data.TensorDictPrioritizedReplayBuffer 屬性) (torchrl.data.TensorDictReplayBuffer 屬性) WriterEnsemble (類,位於 torchrl.data.replay_buffers) X xpu() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) Z zero() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法) zero_grad() (torchrl.data.BinaryToDecimal 方法) (torchrl.data.DensifyReward 方法) (torchrl.data.HashToInt 方法) (torchrl.data.llm.TopKRewardSelector 方法) (torchrl.data.MultiStep 方法) (torchrl.data.QueryModule 方法) (torchrl.data.RandomProjectionHash 方法) (torchrl.data.SipHash 方法) (torchrl.data.TensorDictMap 方法) (torchrl.envs.AsyncEnvPool 方法) (torchrl.envs.ChessEnv 方法) (torchrl.envs.EnvBase 方法) (torchrl.envs.GymLikeEnv 方法) (torchrl.envs.llm.ChatEnv 方法) (torchrl.envs.llm.DatasetChatEnv 方法) (torchrl.envs.llm.GSM8KEnv 方法) (torchrl.envs.llm.GSM8KPrepareQuestion 方法) (torchrl.envs.llm.GSM8KRewardParser 方法) (torchrl.envs.llm.IFEvalEnv 方法) (torchrl.envs.llm.IfEvalScorer 方法) (torchrl.envs.llm.LLMEnv 方法) (torchrl.envs.llm.LLMHashingEnv 方法) (torchrl.envs.llm.MLGymWrapper 方法) (torchrl.envs.llm.transforms.AddThinkingPrompt 方法) (torchrl.envs.llm.transforms.BrowserTransform 方法) (torchrl.envs.llm.transforms.DataLoadingPrimer 方法) (torchrl.envs.llm.transforms.KLComputation 方法) (torchrl.envs.llm.transforms.KLRewardTransform 方法) (torchrl.envs.llm.transforms.MCPToolTransform 方法) (torchrl.envs.llm.transforms.PolicyVersion 方法) (torchrl.envs.llm.transforms.PythonInterpreter 方法) (torchrl.envs.llm.transforms.RayDataLoadingPrimer 方法) (torchrl.envs.llm.transforms.RetrieveKL 方法) (torchrl.envs.llm.transforms.RetrieveLogProb 方法) (torchrl.envs.llm.transforms.TemplateTransform 方法) (torchrl.envs.llm.transforms.Tokenizer 方法) (torchrl.envs.LLMHashingEnv 方法) (torchrl.envs.ParallelEnv 方法) (torchrl.envs.PendulumEnv 方法) (torchrl.envs.ProcessorAsyncEnvPool 方法) (torchrl.envs.SerialEnv 方法) (torchrl.envs.ThreadingAsyncEnvPool 方法) (torchrl.envs.TicTacToeEnv 方法) (torchrl.envs.transforms.rb_transforms.MultiStepTransform 方法) (torchrl.modules.llm.LLMWrapperBase 方法) (torchrl.modules.llm.TransformersWrapper 方法) (torchrl.modules.llm.vLLMWrapper 方法) (torchrl.objectives.llm.GRPOLoss 方法) (torchrl.objectives.llm.MCAdvantage 方法) (torchrl.objectives.llm.SFTLoss 方法) zeros() (torchrl.data.Binary 方法) (torchrl.data.BinaryDiscreteTensorSpec 方法) (torchrl.data.Bounded 方法) (torchrl.data.BoundedTensorSpec 方法) (torchrl.data.Categorical 方法) (torchrl.data.Composite 方法) (torchrl.data.CompositeSpec 方法) (torchrl.data.DiscreteTensorSpec 方法) (torchrl.data.LazyStackedCompositeSpec 方法) (torchrl.data.LazyStackedTensorSpec 方法) (torchrl.data.MultiCategorical 方法) (torchrl.data.MultiDiscreteTensorSpec 方法) (torchrl.data.MultiOneHot 方法) (torchrl.data.MultiOneHotDiscreteTensorSpec 方法) (torchrl.data.NonTensor 方法) (torchrl.data.NonTensorSpec 方法) (torchrl.data.OneHot 方法) (torchrl.data.OneHotDiscreteTensorSpec 方法) (torchrl.data.Stacked 方法) (torchrl.data.StackedComposite 方法) (torchrl.data.TensorSpec 方法) (torchrl.data.Unbounded 方法) (torchrl.data.UnboundedContinuous 方法) (torchrl.data.UnboundedContinuousTensorSpec 方法) (torchrl.data.UnboundedDiscrete 方法) (torchrl.data.UnboundedDiscreteTensorSpec 方法)