update arxiv links

tum-pbs · Aug 22, 2024 · 480b17b · 480b17b
1 parent d2cba5e
commit 480b17b
Show file tree

Hide file tree

Showing 10 changed files with 232 additions and 28 deletions.
diff --git a/ConFIG/grad_operator.py b/ConFIG/grad_operator.py
@@ -82,7 +82,8 @@ def ConFIG_update(
         length_model (LengthModel, optional): The length model for rescaling the length of the final gradient. 
             Defaults to ProjectionLength(), which will project each gradient vector onto the final gradient vector to get the final length.
         use_latest_square (bool, optional): Whether to use the latest square method for calculating the best direction. 
-            If set to False, we will directly calculate the pseudo-inverse of the gradient matrix. Recommended to set to True. Defaults to True.
+            If set to False, we will directly calculate the pseudo-inverse of the gradient matrix. See `torch.linalg.pinv` and `torch.linalg.lstsq` for more details.
+            Recommended to set to True. Defaults to True.
         losses (Optional[Sequence], optional): The losses associated with the gradients. 
             The losses will be passed to the weight and length model. If your weight/length model doesn't require loss information,
             you can set this value as None. Defaults to None.
@@ -185,7 +186,8 @@ class ConFIGOperator(GradientOperator):
         allow_simplified_model (bool, optional): Whether to allow simplified model for calculating the gradient. 
             If set to True, will use simplified form of ConFIG method when there are only two losses (ConFIG_update_double). Defaults to True.
         use_latest_square (bool, optional): Whether to use the latest square method for calculating the best direction. 
-            If set to False, we will directly calculate the pseudo-inverse of the gradient matrix. Recommended to set to True. Defaults to True.
+            If set to False, we will directly calculate the pseudo-inverse of the gradient matrix. See `torch.linalg.pinv` and `torch.linalg.lstsq` for more details.
+            Recommended to set to True. Defaults to True.
     
     Examples:
         ```python

diff --git a/README.md b/README.md
@@ -1,4 +1,3 @@
-> *** Note: The Arxiv paper is being uploaded and this repository is still being edited. ***
 <h1 align="center">
   <img src="./docs/assets/config.png" width="400"/>
 </h1>
@@ -7,7 +6,7 @@
 <h6 align="center">Towards Conflict-free Training for Everything and Everyone!</h6>
 
 <p align="center">
-  [<a href="https://arxiv.org/abs/2312.05320">📄 Research Paper</a>]•[<a href="https://tum-pbs.github.io/ConFIG/">📖 Documentation & Examples</a>]
+  [<a href="https://arxiv.org/abs/2408.11104">📄 Research Paper</a>]•[<a href="https://tum-pbs.github.io/ConFIG/">📖 Documentation & Examples</a>]
 </p>
 
 ## About
@@ -51,7 +50,7 @@ Then the dot product between $\boldsymbol{g}_{ConFIG}$ and each loss-specific gr
 
 ***Abstract:*** The loss functions of many learning problems contain multiple additive terms that can disagree and yield conflicting update directions. For Physics-Informed Neural Networks (PINNs), loss terms on initial/boundary conditions and physics equations are particularly interesting as they are well-established as highly difficult tasks. To improve learning the challenging multi-objective task posed by PINNs, we propose the ConFIG method, which provides conflict-free updates by ensuring a positive dot product between the final update and each loss-specific gradient. It also maintains consistent optimization rates for all loss terms and dynamically adjusts gradient magnitudes based on conflict levels. We additionally leverage momentum to accelerate optimizations by alternating the back-propagation of different loss terms. The proposed method is evaluated across a range of challenging PINN scenarios, consistently showing superior performance and runtime compared to baseline methods. We also test the proposed method in a classic multi-task benchmark, where the ConFIG method likewise exhibits a highly promising performance. 
 
-***Read from:*** [[Arxiv](https://arxiv.org/abs/2312.05320)]
+***Read from:*** [[Arxiv](https://arxiv.org/abs/2408.11104)]
 
 ***Cite as:*** 
 
@@ -60,7 +59,7 @@ Then the dot product between $\boldsymbol{g}_{ConFIG}$ and each loss-specific gr
 author = {Qiang Liu and Mengyu Chu and Nils Thuerey},
 title = {ConFIG: Towards Conflict-free Training of Physics Informed Neural Networks},
 year={2024},
-url={arXiv XXXX},
+url={https://arxiv.org/abs/2408.11104},
 }
 ```
 

diff --git a/docs/assets/algorithm.png b/docs/assets/algorithm.png
diff --git a/docs/index.md b/docs/index.md
@@ -1,17 +1,15 @@
 ---
 hide:
-  - navigation
   - toc
 ---
-**Note: The Arxiv paper is being uploaded and this repository is still being edited.**
 
 <p align="center">
   <img src="./assets/config.png" width="400"/>
 </p>
 <h4 align="center">Towards Conflict-free Training for Everything and Everyone!</h4>
 
 <p align="center">
-  [ <a href="https://arxiv.org/abs/2312.05320">📄 Research Paper</a> ]•[ <a href="https://github.com/tum-pbs/ConFIG"><img src="./assets/github.svg" width="16"> GitHub Repository</a> ]
+  [ <a href="https://arxiv.org/abs/2408.11104">📄 Research Paper</a> ]•[ <a href="https://github.com/tum-pbs/ConFIG"><img src="./assets/github.svg" width="16"> GitHub Repository</a> ]
 </p>
 
 ---
@@ -59,7 +57,7 @@ Then the dot product between $\mathbf{g}_{ConFIG}$ and each loss-specific gradie
 
 ***Abstract:*** The loss functions of many learning problems contain multiple additive terms that can disagree and yield conflicting update directions. For Physics-Informed Neural Networks (PINNs), loss terms on initial/boundary conditions and physics equations are particularly interesting as they are well-established as highly difficult tasks. To improve learning the challenging multi-objective task posed by PINNs, we propose the ConFIG method, which provides conflict-free updates by ensuring a positive dot product between the final update and each loss-specific gradient. It also maintains consistent optimization rates for all loss terms and dynamically adjusts gradient magnitudes based on conflict levels. We additionally leverage momentum to accelerate optimizations by alternating the back-propagation of different loss terms. The proposed method is evaluated across a range of challenging PINN scenarios, consistently showing superior performance and runtime compared to baseline methods. We also test the proposed method in a classic multi-task benchmark, where the ConFIG method likewise exhibits a highly promising performance. 
 
-***Read from:*** [[Arxiv](https://arxiv.org/abs/2312.05320)]
+***Read from:*** [[Arxiv](https://arxiv.org/abs/2408.11104)]
 
 ***Cite as:*** 
 
@@ -68,7 +66,7 @@ Then the dot product between $\mathbf{g}_{ConFIG}$ and each loss-specific gradie
 author = {Qiang Liu and Mengyu Chu and Nils Thuerey},
 title = {ConFIG: Towards Conflict-free Training of Physics Informed Neural Networks},
 year={2024},
-url={arXiv XXXX},
+url={https://arxiv.org/abs/2408.11104},
 }
 ```
 

diff --git a/docs/start.md → docs/start/start.md b/docs/start.md → docs/start/start.md
@@ -1,4 +1,4 @@
-# Get Started
+# Quick Start
 
 ## Installation
 

diff --git a/docs/start/theory.md b/docs/start/theory.md
@@ -0,0 +1,88 @@
+# Theory Introduction
+
+Our ConFIG method aims to eliminate conflicts among multiple loss terms in gradient descent optimizations.
+
+## ConFIG
+
+Generically, we consider an optimization procedure with a set of $m$ individual loss functions, i.e., $\{\mathcal{L}_1,\mathcal{L}_2,\cdots,\mathcal{L}_m\}$. Let $\{\mathbf{g}_1,\mathbf{g}_2, \cdots, \mathbf{g}_m\}$ denote the individual gradients corresponding to each of the loss functions. A gradient-descent step with gradient $\mathbf{g}_c$ will conflict with the decrease of $\mathcal{L}_i$ if $\mathbf{g}_i^\top \mathbf{g}_c$ is **negative**. Thus, to ensure that all losses are decreasing simultaneously along $\mathbf{g}_c$, all $m$ components of  $[\mathbf{g}_1,\mathbf{g}_2,\cdots, \mathbf{g}_m]^\top\mathbf{g}_c$ should be positive. This condition is fulfilled by setting $\mathbf{g}_c = [\mathbf{g}_1,\mathbf{g}_2,\cdots, \mathbf{g}_m]^{-\top} \mathbf{w}$,  where $\mathbf{w}=[w_1,w_2,\cdots,w_m]$ is a vector with $m$ positive components and $M^{-\top}$ is the pseudoinverse of the transposed matrix $M^{\top}$. 
+
+Although a positive $\mathbf{w}$ vector guarantees a conflict-free update direction for all losses, the specific value of $w_i$ further influences the exact direction of $\mathbf{g}_c$. To facilitate determining $\mathbf{w}$, we reformulate $\mathbf{g}_c$ as $\mathbf{g}_c=k[\mathcal{U}(\mathbf{g}_1),\mathcal{U}(\mathbf{g}_2),\cdots, \mathcal{U}(\mathbf{g}_m)]^{-\top} \mathbf{\hat{w}}$, where $\mathcal{U}(\mathbf{g}_i)=\mathbf{g}_i/(|\mathbf{g}_i|+\varepsilon)$ is a normalization operator and $k>0$. Now, $k$ controls the length of $\mathbf{g}_c$ and the ratio of $\mathbf{\hat{w}}$'s components corresponds to the ratio of $\mathbf{g}_c$'s projections onto each loss-specific $\mathbf{g}_i$, i.e., $|\mathbf{g}_c|\mathcal{S}_c(\mathbf{g},\mathbf{g}_i)$, where $\mathcal{S}_c(\mathbf{g}_i,\mathbf{g}_j)=\mathbf{g}_i^\top\mathbf{g}_j/(|\mathbf{g}_i||\mathbf{g}_j|+\varepsilon)$ is the operator for cosine similarity:
+
+$$
+\frac{
+|\mathbf{g}_c|\mathcal{S}_c(\mathbf{g}_c,\mathbf{g}_i)
+}{
+|\mathbf{g}_c|\mathcal{S}_c(\mathbf{g}_c,\mathbf{g}_j)
+}
+=
+\frac{
+\mathcal{S}_c(\mathbf{g}_c,\mathbf{g}_i)
+}{
+\mathcal{S}_c(\mathbf{g}_c,\mathbf{g}_j)
+}
+=
+\frac{
+\mathcal{S}_c(\mathbf{g}_c,k\mathcal{U}(\mathbf{g}_i))
+}{
+\mathcal{S}_c(\mathbf{g}_c,k\mathcal{U}(\mathbf{g}_j))
+}
+=
+\frac{
+[k\mathcal{U}(\mathbf{g}_i)]^\top \mathbf{g}_c
+}{
+[k\mathcal{U}(\mathbf{g}_j)]^\top \mathbf{g}_c
+}
+=
+\frac{\hat{w}_i
+}{
+\hat{w}_j
+}
+\quad
+\forall i,j \in [1,m].
+$$
+
+We call $\mathbf{\hat{w}}$ the **direction weight**. The projection length of $\mathbf{g}_c$ on each loss-specific gradient serves as an effective “learning rate'' for each loss. Here, we choose $\hat{w}_i=\hat{w}_j \ \forall i,j \in [1,m]$ to ensure a uniform decrease rate of all losses, as it was shown to yield a weak form of Pareto optimality for multi-task learning. 
+
+
+Meanwhile, we introduce an adaptive strategy for the length of $\mathbf{g}_c$ rather than directly setting a fixed value of $k$. We notice that the length of $\mathbf{g}_c$ should increase when all loss-specific gradients point nearly in the same direction since it indicates a favorable direction for optimization. Conversely, when loss-specific gradients are close to opposing each other, the magnitude of $\mathbf{g}_c$ should decrease. We realize this by rescaling the length of $\mathbf{g}_c$ to the sum of the projection lengths of each loss-specific gradient on it, i.e., $|\mathbf{g}_c|=\sum_{i=1}^m|\mathbf{g}_i|\mathcal{S}_c(\mathbf{g}_i,\mathbf{g}_c)$. 
+
+The procedures above are summarized in the **Con**flict-**F**ree **I**nverse **G**radients (ConFIG) operator $G$ and we correspondingly denote the final update gradient $\mathbf{g}_c$ with $\mathbf{g}_{\text{ConFIG}}$:
+
+$$
+\mathbf{g}_{\text{ConFIG}}=\mathcal{G}(\mathbf{g}_1,\mathbf{g}_1,\cdots,\mathbf{g}_m):=\left(\sum_{i=1}^m \mathbf{g}_i^\top\mathbf{g}_u\right)\mathbf{g}_u,
+$$
+
+$$
+\mathbf{g}_u = \mathcal{U}\left[
+[\mathcal{U}(\mathbf{g}_1),\mathcal{U}(\mathbf{g}_2),\cdots, \mathcal{U}(\mathbf{g}_m)]^{-\top} \mathbf{1}_m\right].
+$$
+
+Here, $\mathbf{1}_m$ is a unit vector with $m$ components. These two equations are implemented as [ConFIG.grad_operator.ConFIG_update()](../../api/grad_operator/#ConFIG.grad_operator.ConFIG_update) and [ConFIG.grad_operator.ConFIGOperator.calculate_gradient()](../../api/grad_operator/#ConFIG.grad_operator.ConFIGOperator.calculate_gradient). Meanwhile, we also provide [weight_model](../../api/weight_model/) to allow you implement different direction weights ($\hat{\mathbf{w}}=\mathbf{1}_m$ as default) and [length_model](../../api/length_model/) to allow you design different length projection (the above adaptive strategy as default). We encourage you to design and try different weight/length models and compare the result with default configurations.
+
+
+## ConFIG in two-loss scenario
+For the special case of only two loss terms, there is an equivalent form of ConFIG that does not require a pseudoinverse:
+
+$$
+\begin{align}
+\mathcal{G}(\mathbf{g}_1,\mathbf{g}_2)=(\mathbf{g}_1^\top\mathbf{g}_{v}+\mathbf{g}_2^\top\mathbf{g}_{v}) \mathbf{g}_{v} 
+\\
+\mathbf{g}_{v}=\mathcal{U}\left[\mathcal{U}(\mathcal{O}(\mathbf{g}_1,\mathbf{g}_2))+\mathcal{U}(\mathcal{O}(\mathbf{g}_2,\mathbf{g}_1))\right]
+\end{align}
+$$
+
+where $\mathcal{O}(\mathbf{g}_1,\mathbf{g}_2)=\mathbf{g}_2-\frac{\mathbf{g}_1^\top\mathbf{g}_2}{|\mathbf{g}1|^2}\mathbf{g}_1$ is the orthogonality operator. It returns a vector orthogonal to $\mathbf{g}_1$ from the plane spanned by $\mathbf{g}_{1}$ and $\mathbf{g}_{2}$. 
+
+This equivlance is implemented as [ConFIG.grad_operator.ConFIG_update_double()](../../api/grad_operator/#ConFIG.grad_operator.ConFIG_update_double. You can also set `allow_simplified_model` to true in [ConFIG.grad_operator.ConFIGOperator](../../api/grad_operator/#ConFIG.grad_operator.ConFIGOperator) to enable using this form in two-loss scenario.
+
+## M-ConFIG
+
+Gradient-based methods like the proposed ConFIG method require separate backpropagation steps to compute the gradient for each loss term, which could be computationally expensive.  To address this issue, we introduce an accelerated momentum-based variant of ConFIG: **M-ConFIG**. Our core idea is to leverage the momentum of the gradient for the ConFIG operation and update momentum variables in an alternating fashion to avoid backpropagating all losses in a single step. In each iteration, only a single momentum is updated with its corresponding gradient, while the others are carried over from previous steps.  Algorithm 1 details the entire procedure of M-ConFIG.
+
+![M-ConFIG](../assets/algorithm.png)
+
+The M-ConFIG method is implemented as [ConFIG.momentum_operator.PseudoMomentumOperator](../../api/momentum_operator/#ConFIG.momentum_operator.PseudoMomentumOperator). This momentum method can also be used for other gradient-based methods. In [ConFIG.momentum_operator.PseudoMomentumOperator](../../api/momentum_operator/#ConFIG.momentum_operator.PseudoMomentumOperator),you can modify the `gradient_operator` parameter to enable momentum acceleration for other methods.
+
+---
+
+For detailed discussion of the background theory, please check our [research paper](https://arxiv.org/abs/2408.11104).
diff --git a/install.sh b/install.sh
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -11,11 +11,13 @@ theme:
     name: material
     features:
         #- navigation.sections  # Sections are included in the navigation on the left.
-        - navigation.tabs  # Tabs are included in the navigation on the left.
-        - toc.integrate  # Table of contents is integrated on the left; does not appear separately on the right.
+        #- navigation.tabs  # Tabs are included in the navigation on the left.
+        #- toc.integrate  # Table of contents is integrated on the left; does not appear separately on the right.
+        - toc.follow
         - header.autohide  # header disappears as you scroll
         - navigation.top
         - navigation.footer
+        - navigation.path
     palette:
         - scheme: default
           primary: brown
@@ -83,14 +85,16 @@ plugins:
 
 nav:
     - 'Home': 'index.md'
-    - 'Get Started': 'start.md'
-    - 'Examples': 
-        - 'Toy Example of Muti-task Learning': 'examples/mtl_toy.ipynb'
-        - "Solve Burgers' Equation with PINN": 'examples/pinn_burgers.ipynb'
-    - 'API Reference':
-        - "Gradient Operator": 'api/grad_operator.md'
-        - "Momentum Operator": 'api/momentum_operator.md'
-        - "Weight Model": 'api/weight_model.md'
-        - "Length Model": 'api/length_model.md'
-        - "Loss Recorder": 'api/loss_recorder.md'
-        - "Utils": 'api/utils.md'
+    - '1. Get Started': 
+        - '1.1. Quick Start': 'start/start.md'
+        - '1.2. Theory Introduction': 'start/theory.md'
+    - '2. Examples': 
+        - '2.1. Toy Example of Muti-task Learning': 'examples/mtl_toy.ipynb'
+        - "2.2. Solve Burgers' Equation with PINN": 'examples/pinn_burgers.ipynb'
+    - '3. API Reference':
+        - "3.1. Gradient Operator": 'api/grad_operator.md'
+        - "3.2. Momentum Operator": 'api/momentum_operator.md'
+        - "3.3. Weight Model": 'api/weight_model.md'
+        - "4.4. Length Model": 'api/length_model.md'
+        - "4.5. Loss Recorder": 'api/loss_recorder.md'
+        - "4.6. Utils": 'api/utils.md'