Merge pull request #371 from GFNOrg/constant_pb

josephdviviano · web-flow · commit dba5d7d88572 · 2025-09-03T11:00:17.000-04:00
minor change to allow for pb to be None when the gflownet's DAG is a tree
diff --git a/src/gfn/gflownet/base.py b/src/gfn/gflownet/base.py
@@ -1,4 +1,5 @@
 import math
+import warnings
 from abc import ABC, abstractmethod
 from typing import Any, Generic, Tuple, TypeVar
 
@@ -151,19 +152,52 @@ class PFBasedGFlowNet(GFlowNet[TrainingSampleType], ABC):
 
     Attributes:
         pf: The forward policy estimator.
-        pb: The backward policy estimator.
+        pb: The backward policy estimator, or None if it can be ignored (e.g., the
+            gflownet DAG is a tree, and pb is therefore always 1).
+        constant_pb: Whether to ignore the backward policy estimator.
     """
 
-    def __init__(self, pf: Estimator, pb: Estimator) -> None:
+    def __init__(
+        self, pf: Estimator, pb: Estimator | None, constant_pb: bool = False
+    ) -> None:
         """Initializes a PFBasedGFlowNet instance.
 
         Args:
             pf: The forward policy estimator.
-            pb: The backward policy estimator.
+            pb: The backward policy estimator, or None if the gflownet DAG is a tree,
+                and pb is therefore always 1.
+            constant_pb: Whether to ignore the backward policy estimator, e.g., if the
+                gflownet DAG is a tree, and pb is therefore always 1. Must be set
+                explicitly by user to ensure that pb is an Estimator except under this
+                special case.
         """
         super().__init__()
+        # Technical note: pb may be constant for a variety of edge cases, for example,
+        # if all terminal states can be reached with exactly the same number of
+        # trajectories, and we assume a uniform backward policy, then we can omit the pb
+        # term (see section 6 of Discrete Probabilistic Inference as Control in
+        # Multi-path Environments by Tristan Deleu, Padideh Nouri, Nikolay Malkin,
+        # Doina Precup, Yoshua Bengio for more details). We do not intend to document
+        # all of these cases for now.
+        if pb is None and not constant_pb:
+            raise ValueError(
+                "pb must be an Estimator unless constant_pb is True. "
+                "If you intend to ignore pb, e.g., the gflownet DAG is a tree, "
+                "set constant_pb to True."
+            )
+        if isinstance(pb, Estimator) and constant_pb:
+            warnings.warn(
+                "The user specified that pb should be ignored, and specified a "
+                "backward policy estimator. Under normal circumstances, pb should be "
+                "None if pb is constant, (e.g., the GFlowNet DAG is a tree and "
+                "the backward policy probability is always 1), because learning a "
+                "backward policy estimator is not necessary and will slow down "
+                "training. Please ensure this is the intended experimental setup."
+            )
+
         self.pf = pf
         self.pb = pb
+        self.constant_pb = constant_pb
 
     def sample_trajectories(
         self,
@@ -221,9 +255,28 @@ class TrajectoryBasedGFlowNet(PFBasedGFlowNet[Trajectories]):
 
     Attributes:
         pf: The forward policy module.
-        pb: The backward policy module.
+        pb: The backward policy module, or None if the gflownet DAG is a tree, and
+            pb is therefore always 1.
+        constant_pb: Whether to ignore the backward policy estimator, e.g., if the
+            gflownet DAG is a tree, and pb is therefore always 1.
     """
 
+    def __init__(
+        self, pf: Estimator, pb: Estimator | None, constant_pb: bool = False
+    ) -> None:
+        """Initializes a TrajectoryBasedGFlowNet instance.
+
+        Args:
+            pf: The forward policy estimator.
+            pb: The backward policy estimator, or None if the gflownet DAG is a tree, and
+                pb is therefore always 1.
+            constant_pb: Whether to ignore the backward policy estimator, e.g., if the
+                gflownet DAG is a tree, and pb is therefore always 1. Must be set
+                explicitly by user to ensure that pb is an Estimator except under this
+                special case.
+        """
+        super().__init__(pf, pb, constant_pb=constant_pb)
+
     def get_pfs_and_pbs(
         self,
         trajectories: Trajectories,
diff --git a/src/gfn/gflownet/detailed_balance.py b/src/gfn/gflownet/detailed_balance.py
@@ -63,30 +63,38 @@ class DBGFlowNet(PFBasedGFlowNet[Transitions]):
         log_reward_clip_min: If finite, clips log rewards to this value.
         safe_log_prob_min: If True, uses -1e10 as the minimum log probability value
             to avoid numerical instability, otherwise uses -1e38.
+        constant_pb: Whether to ignore the backward policy estimator, e.g., if the
+            gflownet DAG is a tree, and pb is therefore always 1.
     """
 
     def __init__(
         self,
         pf: Estimator,
-        pb: Estimator,
+        pb: Estimator | None,
         logF: ScalarEstimator | ConditionalScalarEstimator,
         forward_looking: bool = False,
         log_reward_clip_min: float = -float("inf"),
         safe_log_prob_min: bool = True,
+        constant_pb: bool = False,
     ) -> None:
         """Initializes a DBGFlowNet instance.
 
         Args:
             pf: The forward policy estimator.
-            pb: The backward policy estimator.
+            pb: The backward policy estimator, or None if the gflownet DAG is a tree, and
+                pb is therefore always 1.
             logF: A ScalarEstimator or ConditionalScalarEstimator for estimating the log
                 flow of the states.
             forward_looking: Whether to use the forward-looking GFN loss.
             log_reward_clip_min: If finite, clips log rewards to this value.
             safe_log_prob_min: If True, uses -1e10 as the minimum log probability value
                 to avoid numerical instability, otherwise uses -1e38.
+            constant_pb: Whether to ignore the backward policy estimator, e.g., if the
+                gflownet DAG is a tree, and pb is therefore always 1. Must be set
+                explicitly by user to ensure that pb is an Estimator except under this
+                special case.
         """
-        super().__init__(pf, pb)
+        super().__init__(pf, pb, constant_pb=constant_pb)
         assert any(
             isinstance(logF, cls)
             for cls in [ScalarEstimator, ConditionalScalarEstimator]
@@ -285,15 +293,19 @@ class ModifiedDBGFlowNet(PFBasedGFlowNet[Transitions]):
 
     Attributes:
         pf: The forward policy estimator.
-        pb: The backward policy estimator.
-        logF: A ScalarEstimator or ConditionalScalarEstimator for estimating the log
-            flow of the states.
-        forward_looking: Whether to use the forward-looking GFN loss.
-        log_reward_clip_min: If finite, clips log rewards to this value.
-        safe_log_prob_min: If True, uses -1e10 as the minimum log probability value
-            to avoid numerical instability, otherwise uses -1e38.
+        pb: The backward policy estimator, or None if the gflownet DAG is a tree, and
+            pb is therefore always 1.
+        constant_pb: Whether to ignore the backward policy estimator, e.g., if the
+            gflownet DAG is a tree, and pb is therefore always 1. Must be set explicitly
+            by user to ensure that pb is an Estimator except under this special case.
     """
 
+    def __init__(
+        self, pf: Estimator, pb: Estimator | None, constant_pb: bool = False
+    ) -> None:
+        """Initializes a ModifiedDBGFlowNet instance."""
+        super().__init__(pf, pb, constant_pb=constant_pb)
+
     def get_scores(
         self, transitions: Transitions, recalculate_all_logprobs: bool = True
     ) -> torch.Tensor:
@@ -371,18 +383,23 @@ def get_scores(
 
         non_exit_actions = actions[~actions.is_exit]
 
-        if transitions.conditioning is not None:
-            with has_conditioning_exception_handler("pb", self.pb):
-                module_output = self.pb(
-                    valid_next_states, transitions.conditioning[mask]
-                )
+        if self.pb is not None:
+            if transitions.conditioning is not None:
+                with has_conditioning_exception_handler("pb", self.pb):
+                    module_output = self.pb(
+                        valid_next_states, transitions.conditioning[mask]
+                    )
+            else:
+                with no_conditioning_exception_handler("pb", self.pb):
+                    module_output = self.pb(valid_next_states)
+
+            valid_log_pb_actions = self.pb.to_probability_distribution(
+                valid_next_states, module_output
+            ).log_prob(non_exit_actions.tensor)
         else:
-            with no_conditioning_exception_handler("pb", self.pb):
-                module_output = self.pb(valid_next_states)
-
-        valid_log_pb_actions = self.pb.to_probability_distribution(
-            valid_next_states, module_output
-        ).log_prob(non_exit_actions.tensor)
+            # If pb is None, we assume that the gflownet DAG is a tree, and therefore
+            # the backward policy probability is always 1 (log probs are 0).
+            valid_log_pb_actions = torch.zeros_like(valid_log_pf_s_exit)
 
         preds = all_log_rewards[:, 0] + valid_log_pf_actions + valid_log_pf_s_prime_exit
         targets = all_log_rewards[:, 1] + valid_log_pb_actions + valid_log_pf_s_exit
diff --git a/src/gfn/gflownet/sub_trajectory_balance.py b/src/gfn/gflownet/sub_trajectory_balance.py
@@ -33,7 +33,8 @@ class SubTBGFlowNet(TrajectoryBasedGFlowNet):
 
     Attributes:
         pf: The forward policy estimator.
-        pb: The backward policy estimator.
+        pb: The backward policy estimator, or None if the gflownet DAG is a tree, and
+            pb is therefore always 1.
         logF: A ScalarEstimator or ConditionalScalarEstimator for estimating the log flow
             of the states.
         weighting: The sub-trajectories weighting scheme.
@@ -60,12 +61,14 @@ class SubTBGFlowNet(TrajectoryBasedGFlowNet):
         lamda: Discount factor for longer trajectories (used in geometric weighting).
         log_reward_clip_min: If finite, clips log rewards to this value.
         forward_looking: Whether to use the forward-looking GFN loss.
+        constant_pb: Whether to ignore the backward policy estimator, e.g., if the
+            gflownet DAG is a tree, and pb is therefore always 1.
     """
 
     def __init__(
         self,
         pf: Estimator,
-        pb: Estimator,
+        pb: Estimator | None,
         logF: ScalarEstimator | ConditionalScalarEstimator,
         weighting: Literal[
             "DB",
@@ -79,6 +82,7 @@ def __init__(
         lamda: float = 0.9,
         log_reward_clip_min: float = -float("inf"),
         forward_looking: bool = False,
+        constant_pb: bool = False,
     ):
         """Initializes a SubTBGFlowNet instance.
 
@@ -92,8 +96,12 @@ def __init__(
             lamda: Discount factor for longer trajectories (used in geometric weighting).
             log_reward_clip_min: If finite, clips log rewards to this value.
             forward_looking: Whether to use the forward-looking GFN loss.
+            constant_pb: Whether to ignore the backward policy estimator, e.g., if the
+                gflownet DAG is a tree, and pb is therefore always 1. Must be set
+                explicitly by user to ensure that pb is an Estimator except under this
+                special case.
         """
-        super().__init__(pf, pb)
+        super().__init__(pf, pb, constant_pb=constant_pb)
         assert any(
             isinstance(logF, cls)
             for cls in [ScalarEstimator, ConditionalScalarEstimator]
diff --git a/src/gfn/gflownet/trajectory_balance.py b/src/gfn/gflownet/trajectory_balance.py
@@ -32,30 +32,37 @@ class TBGFlowNet(TrajectoryBasedGFlowNet):
 
     Attributes:
         pf: The forward policy estimator.
-        pb: The backward policy estimator.
+        pb: The backward policy estimator, or None if the gflownet DAG is a tree, and
+            pb is therefore always 1.
         logZ: A learnable parameter or a ScalarEstimator instance (for conditional GFNs).
         log_reward_clip_min: If finite, clips log rewards to this value.
+        constant_pb: Whether the gflownet DAG is a tree, and pb is therefore always 1.
     """
 
     def __init__(
         self,
         pf: Estimator,
-        pb: Estimator,
+        pb: Estimator | None,
         logZ: nn.Parameter | ScalarEstimator | None = None,
         init_logZ: float = 0.0,
         log_reward_clip_min: float = -float("inf"),
+        constant_pb: bool = False,
     ):
         """Initializes a TBGFlowNet instance.
 
         Args:
             pf: The forward policy estimator.
-            pb: The backward policy estimator.
+            pb: The backward policy estimator, or None if the gflownet DAG is a tree, and
+                pb is therefore always 1.
             logZ: A learnable parameter or a ScalarEstimator instance (for
                 conditional GFNs).
             init_logZ: The initial value for the logZ parameter (used if logZ is None).
             log_reward_clip_min: If finite, clips log rewards to this value.
+            constant_pb: Whether to ignore pb e.g., the GFlowNet DAG is a tree, and pb
+                is therefore always 1. Must be set explicitly by user to ensure that pb
+                is an Estimator except under this special case.
         """
-        super().__init__(pf, pb)
+        super().__init__(pf, pb, constant_pb=constant_pb)
 
         self.logZ = logZ or nn.Parameter(torch.tensor(init_logZ))
         self.log_reward_clip_min = log_reward_clip_min
diff --git a/src/gfn/utils/prob_calculations.py b/src/gfn/utils/prob_calculations.py
@@ -46,7 +46,7 @@ def check_cond_forward(
 
 def get_trajectory_pfs_and_pbs(
     pf: Estimator,
-    pb: Estimator,
+    pb: Estimator | None,
     trajectories: Trajectories,
     fill_value: float = 0.0,
     recalculate_all_logprobs: bool = True,
@@ -55,7 +55,8 @@ def get_trajectory_pfs_and_pbs(
 
     Args:
         pf: The forward policy estimator.
-        pb: The backward policy estimator.
+        pb: The backward policy estimator, or None if the gflownet DAG is a tree, and
+            pb is therefore always 1.
         trajectories: The trajectories to calculate probabilities for.
         fill_value: The value to fill for invalid states (e.g., sink states).
         recalculate_all_logprobs: Whether to recalculate log probabilities even if they
@@ -157,7 +158,7 @@ def get_trajectory_pfs(
 
 
 def get_trajectory_pbs(
-    pb: Estimator,
+    pb: Estimator | None,
     trajectories: Trajectories,
     fill_value: float = 0.0,
 ) -> torch.Tensor:
@@ -210,11 +211,16 @@ def get_trajectory_pbs(
         # We need to index it with the state_mask to get the valid states
         masked_cond = trajectories.conditioning[state_mask]
 
-    estimator_outputs = check_cond_forward(pb, "pb", valid_states, masked_cond)
+    if pb is not None:
+        estimator_outputs = check_cond_forward(pb, "pb", valid_states, masked_cond)
+        valid_log_pb_actions = pb.to_probability_distribution(
+            valid_states, estimator_outputs
+        ).log_prob(valid_actions.tensor)
 
-    valid_log_pb_actions = pb.to_probability_distribution(
-        valid_states, estimator_outputs
-    ).log_prob(valid_actions.tensor)
+    else:
+        # If pb is None, we assume that the gflownet DAG is a tree, and therefore
+        # the backward policy probability is always 1 (log probs are 0).
+        valid_log_pb_actions = torch.zeros_like(valid_actions.tensor)
 
     log_pb_trajectories[action_mask] = valid_log_pb_actions
 
@@ -233,15 +239,16 @@ def get_trajectory_pbs(
 
 def get_transition_pfs_and_pbs(
     pf: Estimator,
-    pb: Estimator,
+    pb: Estimator | None,
     transitions: Transitions,
     recalculate_all_logprobs: bool = True,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
     """Calculates the log probabilities of forward and backward transitions.
 
     Args:
         pf: The forward policy estimator.
-        pb: The backward policy estimator.
+        pb: The backward policy estimator, or None if the gflownet DAG is a tree, and
+            pb is therefore always 1.
         transitions: The transitions to calculate probabilities for.
         recalculate_all_logprobs: Whether to recalculate log probabilities even if they
             already exist in the transitions object.
@@ -301,11 +308,12 @@ def get_transition_pfs(
     return log_pf_actions
 
 
-def get_transition_pbs(pb: Estimator, transitions: Transitions) -> torch.Tensor:
+def get_transition_pbs(pb: Estimator | None, transitions: Transitions) -> torch.Tensor:
     """Calculates the log probabilities of backward transitions.
 
     Args:
-        pb: The backward policy Estimator.
+        pb: The backward policy Estimator, or None if the gflownet DAG is a tree, and
+            pb is therefore always 1.
         transitions: The transitions to calculate probabilities for.
     """
     # automatically removes invalid transitions (i.e. s_f -> s_f)
@@ -318,18 +326,24 @@ def get_transition_pbs(pb: Estimator, transitions: Transitions) -> torch.Tensor:
         if transitions.conditioning is not None
         else None
     )
-    estimator_outputs = check_cond_forward(pb, "pb", valid_next_states, masked_cond)
 
-    # Evaluate the log PB of the actions.
+    # TODO: We support a fill_value for trajectories, but not for transitions.
+    # Should we add it here, or remove it for trajectories?
     log_pb_actions = torch.zeros(
         (transitions.n_transitions,), device=transitions.states.device
     )
 
-    if len(valid_next_states) != 0:
+    # If pb is None, we assume that the gflownet DAG is a tree, and therefore
+    # the backward policy probability is always 1 (log probs are 0).
+    if pb is not None:
+        estimator_outputs = check_cond_forward(pb, "pb", valid_next_states, masked_cond)
+
+        # Evaluate the log PB of the actions.
         valid_log_pb_actions = pb.to_probability_distribution(
             valid_next_states, estimator_outputs
         ).log_prob(non_exit_actions.tensor)
 
-        log_pb_actions[~transitions.is_terminating] = valid_log_pb_actions
+        if len(valid_next_states) != 0:
+            log_pb_actions[~transitions.is_terminating] = valid_log_pb_actions
 
     return log_pb_actions
diff --git a/tutorials/examples/train_with_example_modes.py b/tutorials/examples/train_with_example_modes.py