From bdfdd8763b61e64ec4dcc56da52099b5274b6173 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 2 Jul 2024 09:20:40 +0200
Subject: [PATCH] update to viash 0.9

---
 .github/workflows/build.yml                   | 119 +-----------
 .github/workflows/test.yml                    | 106 +----------
 _viash.yaml                                   |  34 ++--
 nextflow.config                               |   2 +
 scripts/script.R                              |  48 +++++
 src/common/create_component/config.vsh.yaml   |   4 +-
 src/common/create_component/script.py         |   4 +-
 .../sync_test_resources/config.vsh.yaml       |   4 +-
 .../ground_truth/config.vsh.yaml              |  25 +--
 .../mean_across_celltypes/config.vsh.yaml     |  25 +--
 .../mean_across_compounds/config.vsh.yaml     |   4 +-
 .../mean_outcome/config.vsh.yaml              |  25 +--
 src/control_methods/sample/config.vsh.yaml    |  25 +--
 src/control_methods/zeros/config.vsh.yaml     |  23 +--
 src/methods/jn_ap_op2/config.vsh.yaml         |  63 +++---
 src/methods/lgc_ensemble/config.vsh.yaml      |  95 ++++-----
 .../lgc_ensemble_direct/config.vsh.yaml       | 103 +++++-----
 .../lgc_ensemble_predict/config.vsh.yaml      |  69 +++----
 .../lgc_ensemble_prepare/config.vsh.yaml      | 115 +++++------
 .../lgc_ensemble_train/config.vsh.yaml        |  95 ++++-----
 .../config.vsh.yaml                           |  75 ++++----
 src/methods/pyboost/config.vsh.yaml           |  73 +++----
 src/methods/scape/config.vsh.yaml             | 129 ++++++-------
 .../transformer_ensemble/config.vsh.yaml      |  91 ++++-----
 .../mean_rowwise_correlation/config.vsh.yaml  | 101 +++++-----
 .../mean_rowwise_error/config.vsh.yaml        |  73 +++----
 .../add_uns_metadata/config.vsh.yaml          | 107 +++++------
 src/process_dataset/bootstrap/config.vsh.yaml | 109 +++++------
 .../compute_pseudobulk/config.vsh.yaml        |  51 ++---
 .../convert_h5ad_to_parquet/config.vsh.yaml   |  83 ++++----
 .../config.vsh.yaml                           | 147 +++++++-------
 .../filter_obs/config.vsh.yaml                |  51 ++---
 .../filter_vars/config.vsh.yaml               |  51 ++---
 .../generate_id_map/config.vsh.yaml           |  51 ++---
 src/process_dataset/run_limma/config.vsh.yaml |  95 ++++-----
 src/workflows/process_dataset/config.vsh.yaml |  88 ++++-----
 src/workflows/run_benchmark/config.vsh.yaml   | 180 +++++++++---------
 .../run_stability_analysis/config.vsh.yaml    | 168 ++++++++--------
 38 files changed, 1304 insertions(+), 1407 deletions(-)
 create mode 100644 scripts/script.R

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index cb1323d6..f5bc8988 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,120 +1,21 @@
-name: build
+name: Build
 
 on:
   push:
     branches: [ 'main' ]
   workflow_dispatch:
     inputs:
-      target_branch:
-        description: 'Branch to deploy to. If not specified, `build-${BRANCH_NAME}` will be used.'
-        required: false
       version:
-        description: 'Version name to use for the build. If not specified, `build-${BRANCH_NAME}` will be used.'
+        description: |
+          The version of the project to build. Example: `1.0.3`.
+          
+          If not provided, a development build with a version name
+          based on the branch name will be built. Otherwise, a release
+          build with the provided version will be built.
         required: false
 
 jobs:
-  # phase 1
-  list:
-    runs-on: ubuntu-latest
-
-    outputs:
-      target_branch: ${{ steps.defaults.outputs.target_branch }}
-      version: ${{ steps.defaults.outputs.version }}
-      component_matrix: ${{ steps.set_matrix.outputs.matrix }}
-
-    steps:
-    - uses: actions/checkout@v4
-
-    - uses: viash-io/viash-actions/setup@v5
-
-    - name: Determine version tag from branch name
-      id: defaults
-      run: |
-        BRANCH_NAME=$(echo $GITHUB_REF | sed 's/refs\/heads\///')
-
-        VERSION=${{ github.event.inputs.version }}
-        if [ -z "$VERSION" ]; then
-          VERSION="build-$BRANCH_NAME"
-        fi
-        echo "version=$VERSION" >> $GITHUB_OUTPUT
-
-        TARGET_BRANCH=${{ github.event.inputs.target_branch }}
-        if [ -z "$TARGET_BRANCH" ]; then
-          TARGET_BRANCH="build-$BRANCH_NAME"
-        fi
-        echo "target_branch=$TARGET_BRANCH" >> $GITHUB_OUTPUT
-
-    - name: Remove target folder from .gitignore
-      run: |
-        # allow publishing the target folder
-        sed -i '/^target.*/d' .gitignore
-
-    - uses: viash-io/viash-actions/ns-build@v5
-      with:
-        config_mod: .functionality.version := '${{ steps.defaults.outputs.version }}'
-        parallel: true
-
-    - name: Deploy to target branch
-      uses: peaceiris/actions-gh-pages@v4
-      with:
-        github_token: ${{ secrets.GITHUB_TOKEN }}
-        publish_dir: .
-        publish_branch: ${{ steps.defaults.outputs.target_branch }}
-
-    - id: ns_list
-      uses: viash-io/viash-actions/ns-list@v5
-      with:
-        platform: docker
-        src: src
-        format: json
-
-    - id: set_matrix
-      run: |
-        echo "matrix=$(jq -c '[ .[] | 
-          { 
-            "name": (.functionality.namespace + "/" + .functionality.name),
-            "dir": .info.config | capture("^(?<dir>.*\/)").dir
-          }
-        ]' ${{ steps.ns_list.outputs.output_file }} )" >> $GITHUB_OUTPUT
-
-  # phase 2
   build:
-    needs: list
-
-    runs-on: ubuntu-latest
-
-    strategy:
-      fail-fast: false
-      matrix:
-        component: ${{ fromJson(needs.list.outputs.component_matrix) }}
-
-    steps:
-    # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.'
-    - uses: data-intuitive/reclaim-the-bytes@v2
-    
-    - uses: actions/checkout@v4
-
-    - uses: viash-io/viash-actions/setup@v5
-
-    - name: Build container
-      uses: viash-io/viash-actions/ns-build@v5
-      with:
-        config_mod: .functionality.version := '${{ needs.list.outputs.version }}'
-        platform: docker
-        src: ${{ matrix.component.dir }}
-        setup: build
-        
-    - name: Login to container registry
-      uses: docker/login-action@v3
-      with:
-        registry: ghcr.io
-        username: ${{ secrets.GTHB_USER }}
-        password: ${{ secrets.GTHB_PAT }}
-
-    - name: Push container
-      uses: viash-io/viash-actions/ns-build@v5
-      with:
-        config_mod: .functionality.version := '${{ needs.list.outputs.version }}'
-        platform: docker
-        src: ${{ matrix.component.dir }}
-        setup: push
\ No newline at end of file
+    uses: openproblems-bio/actions/.github/workflows/build.yml@main
+    with:
+      version: ${{ github.event.inputs.version }}
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index d1892c8a..87537860 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,107 +1,9 @@
-name: test
+name: Test
 
 on:
-  pull_request:
   push:
-    branches: [ '**' ]
+  pull_request:
 
 jobs:
-  run_ci_check_job:
-    runs-on: ubuntu-latest
-    outputs:
-      run_ci: ${{ steps.github_cli.outputs.check }}
-    steps:
-      - name: 'Check if branch has an existing pull request and the trigger was a push'
-        id: github_cli
-        run: |
-          pull_request=$(gh pr list -R ${{ github.repository }} -H ${{ github.ref_name }} --json url --state open --limit 1 | jq '.[0].url')
-          # If the branch has a PR and this run was triggered by a push event, do not run
-          if [[ "$pull_request" != "null" && "$GITHUB_REF_NAME" != "main" && "${{ github.event_name == 'push' }}" == "true" && "${{ !contains(github.event.head_commit.message, 'ci force') }}" == "true" ]]; then
-            echo "check=false" >> $GITHUB_OUTPUT
-          else
-            echo "check=true" >> $GITHUB_OUTPUT
-          fi
-        env:
-          GITHUB_TOKEN: ${{ secrets.GTHB_PAT }}
-
-  # phase 1
-  list:
-    needs: run_ci_check_job
-    env:
-      s3_bucket: s3://openproblems-bio/public/neurips-2023-competition/workflow-resources/
-    runs-on: ubuntu-latest
-    if: ${{ needs.run_ci_check_job.outputs.run_ci == 'true' }}
-
-    outputs:
-      matrix: ${{ steps.set_matrix.outputs.matrix }}
-      cache_key: ${{ steps.cache.outputs.cache_key }}
-
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0 
-
-    - uses: viash-io/viash-actions/setup@v5
-
-    - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5
-      id: cache
-      with:
-        s3_bucket: $s3_bucket
-        dest_path: resources
-        cache_key_prefix: resources__
-
-    - id: ns_list
-      uses: viash-io/viash-actions/ns-list@v5
-      with:
-        platform: docker
-        format: json
-
-    - id: ns_list_filtered
-      uses: viash-io/viash-actions/project/detect-changed-components@v5
-      with:
-        input_file: "${{ steps.ns_list.outputs.output_file }}"
-
-    - id: set_matrix
-      run: |
-        echo "matrix=$(jq -c '[ .[] | 
-          { 
-            "name": (.functionality.namespace + "/" + .functionality.name),
-            "config": .info.config
-          }
-        ]' ${{ steps.ns_list_filtered.outputs.output_file }} )" >> $GITHUB_OUTPUT
-
-  # phase 2
-  viash_test:
-    needs: list
-    if: ${{ needs.list.outputs.matrix != '[]' && needs.list.outputs.matrix != '' }}
-    runs-on: ubuntu-latest
-
-    strategy:
-      fail-fast: false
-      matrix:
-        component: ${{ fromJson(needs.list.outputs.matrix) }}
-
-    steps:
-    # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.'
-    - uses: data-intuitive/reclaim-the-bytes@v2
-    
-    - uses: actions/checkout@v4
-
-    - uses: viash-io/viash-actions/setup@v5
-
-    # use cache
-    - name: Cache resources data
-      uses: actions/cache@v4
-      timeout-minutes: 10
-      with:
-        path: resources
-        key: ${{ needs.list.outputs.cache_key }}
-
-    - name: Run test
-      timeout-minutes: 30
-      run: |
-        VIASH_TEMP=$RUNNER_TEMP/viash viash test \
-          "${{ matrix.component.config }}" \
-          --cpus 2 \
-          --memory "5gb"
-
+  build:
+    uses: openproblems-bio/actions/.github/workflows/test.yml@main
diff --git a/_viash.yaml b/_viash.yaml
index 5106f43c..c0d17ca7 100644
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -1,15 +1,25 @@
-viash_version: 0.8.6
+name: task_perturbation_prediction
+version: 1.1.0
 
-source: src
-target: target
+# package metadata
+description: |
+  Predicting how small molecules change gene expression in different cell types.
+license: MIT
+keywords: [single-cell, perturbation prediction, perturbation, openproblems, benchmark]
+links:
+  issue_tracker: https://github.com/openproblems-bio/task_perturbation_prediction/issues
+  repository: https://github.com/openproblems-bio/task_perturbation_prediction
+  docker_registry: ghcr.io
 
+# technical settings
+organization: openproblems-bio
+viash_version: 0.9.0-RC6
+info:
+  test_resources:
+    - type: s3
+      path: s3://openproblems-data/resources/perturbation_prediction
+      dest: resources
+
+# set default labels
 config_mods: |
-  .functionality.version := 'dev'
-  .functionality.arguments[.multiple == true].multiple_sep := ';'
-  .platforms[.type == 'docker'].target_registry := 'ghcr.io'
-  .platforms[.type == 'docker'].target_organization := 'openproblems-bio/task_perturbation_prediction'
-  .platforms[.type == 'docker'].target_image_source := 'https://github.com/openproblems-bio/task_perturbation_prediction'
-  .platforms[.type == "nextflow"].directives.tag := "$id"
-  .platforms[.type == "nextflow"].auto.simplifyOutput := false
-  .platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" }
-  .platforms[.type == "nextflow"].config.script := "process.errorStrategy = 'ignore'"
\ No newline at end of file
+  .runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" }
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 6402ebf2..a3fd6d7f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1 +1,3 @@
 process.container = 'nextflow/bash:latest'
+
+process.errorStrategy = "ignore"
\ No newline at end of file
diff --git a/scripts/script.R b/scripts/script.R
new file mode 100644
index 00000000..19dd2c26
--- /dev/null
+++ b/scripts/script.R
@@ -0,0 +1,48 @@
+library(tidyverse)
+
+# aws s3 sync s3://openproblems-data/resources/perturbation_prediction/results output/benchmark_results
+
+df <- yaml::read_yaml("output/benchmark_results/kaggle_2024-06-02_22-27-09/score_uns.yaml") %>%
+  map_dfr(as.data.frame) %>%
+  as_tibble
+
+df %>% filter(metric_ids == "mean_rowwise_rmse") %>% arrange(metric_values) %>% select(method_id, metric_values)
+
+#    method_id                       metric_values
+#    <chr>                                   <dbl>
+#  1 ground_truth                             0   
+#  2 nn_retraining_with_pseudolabels          1.29
+#  3 scape                                    1.31
+#  4 pyboost                                  1.32
+#  5 jn_ap_op2                                1.34
+#  6 lgc_ensemble                             1.41
+#  7 mean_across_compounds                    1.47
+#  8 transformer_ensemble                     1.55
+#  9 zeros                                    1.57
+# 10 mean_outcome                             1.57
+# 11 mean_across_celltypes                    2.50
+# 12 sample                                   3.02
+
+#######
+
+df <- yaml::read_yaml("output/benchmark_results/run_2024-06-02_22-27-09/score_uns.yaml") %>%
+  map_dfr(as.data.frame) %>%
+  as_tibble
+
+df %>% filter(metric_ids == "mean_rowwise_rmse") %>% arrange(metric_values) %>% select(method_id, metric_values)
+
+# # A tibble: 12 × 2
+#    method_id                       metric_values
+#    <chr>                                   <dbl>
+#  1 ground_truth                            0    
+#  2 nn_retraining_with_pseudolabels         0.757
+#  3 scape                                   0.775
+#  4 pyboost                                 0.795
+#  5 lgc_ensemble                            0.802
+#  6 mean_across_celltypes                   0.892
+#  7 jn_ap_op2                               0.894
+#  8 transformer_ensemble                    0.897
+#  9 mean_outcome                            0.899
+# 10 zeros                                   0.918
+# 11 mean_across_compounds                   0.943
+# 12 sample                                  1.36 
\ No newline at end of file
diff --git a/src/common/create_component/config.vsh.yaml b/src/common/create_component/config.vsh.yaml
index 9a214832..9193f89d 100644
--- a/src/common/create_component/config.vsh.yaml
+++ b/src/common/create_component/config.vsh.yaml
@@ -44,13 +44,15 @@ functionality:
     - type: python_script
       path: script.py
     - path: read_and_merge_yaml.py
-platforms:
+engines:
   - type: docker
     image: python:3.10-slim
     setup:
       - type: python
         pypi: ruamel.yaml
   - type: native
+runners:
+  - type: executable
   - type: nextflow
 
 
diff --git a/src/common/create_component/script.py b/src/common/create_component/script.py
index 65aaad9a..f6e6ceee 100644
--- a/src/common/create_component/script.py
+++ b/src/common/create_component/script.py
@@ -60,7 +60,9 @@ def create_config(par, component_type, pretty_name, script_path) -> str:
     |  # This platform allows running the component natively
     |  - type: native
     |  # Allows turning the component into a Nextflow module / pipeline.
-    |  - type: nextflow
+    |runners:
+  - type: executable
+  - type: nextflow
     |    directives:
     |      label: [midtime,midmem,midcpu]
     |'''
diff --git a/src/common/sync_test_resources/config.vsh.yaml b/src/common/sync_test_resources/config.vsh.yaml
index 017f1dfe..5189fe43 100644
--- a/src/common/sync_test_resources/config.vsh.yaml
+++ b/src/common/sync_test_resources/config.vsh.yaml
@@ -34,8 +34,10 @@ functionality:
   resources:
     - type: bash_script
       path: script.sh
-platforms:
+engines:
   - type: docker
     image: "amazon/aws-cli:2.7.12"
   - type: native
+runners:
+  - type: executable
   - type: nextflow
diff --git a/src/control_methods/ground_truth/config.vsh.yaml b/src/control_methods/ground_truth/config.vsh.yaml
index 2542457b..46b6ebbb 100644
--- a/src/control_methods/ground_truth/config.vsh.yaml
+++ b/src/control_methods/ground_truth/config.vsh.yaml
@@ -1,16 +1,15 @@
 __merge__: ../../api/comp_control_method.yaml
-functionality:
-  name: ground_truth
-  info:
-    label: Ground truth
-    summary: "Returns the ground truth predictions."
-    description: |
-      The identity function that returns the ground-truth information as the output.
-    preferred_normalization: counts
-  resources:
-    - type: r_script
-      path: script.R
-platforms:
+name: ground_truth
+info:
+  label: Ground truth
+  summary: "Returns the ground truth predictions."
+  description: |
+    The identity function that returns the ground-truth information as the output.
+  preferred_normalization: counts
+resources:
+  - type: r_script
+    path: script.R
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_r:1.0.4
     setup:
@@ -18,6 +17,8 @@ platforms:
         cran: [ arrow, dplyr ]
       - type: python
         packages: [ fastparquet ]
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, highmem, highcpu ]
\ No newline at end of file
diff --git a/src/control_methods/mean_across_celltypes/config.vsh.yaml b/src/control_methods/mean_across_celltypes/config.vsh.yaml
index a5102739..9ae27c44 100644
--- a/src/control_methods/mean_across_celltypes/config.vsh.yaml
+++ b/src/control_methods/mean_across_celltypes/config.vsh.yaml
@@ -1,21 +1,22 @@
 __merge__: ../../api/comp_control_method.yaml
-functionality:
-  name: mean_across_celltypes
-  info:
-    label: Mean per cell type and gene
-    summary: Baseline method that returns mean of cell type's outcomes
-    description: |
-      Baseline method that predicts for a cell type the mean of its outcomes of all compounds.
-  resources:
-    - type: python_script
-      path: script.py
-    - path: ../../utils/anndata_to_dataframe.py
-platforms:
+name: mean_across_celltypes
+info:
+  label: Mean per cell type and gene
+  summary: Baseline method that returns mean of cell type's outcomes
+  description: |
+    Baseline method that predicts for a cell type the mean of its outcomes of all compounds.
+resources:
+  - type: python_script
+    path: script.py
+  - path: ../../utils/anndata_to_dataframe.py
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_python:1.0.4
     setup:
       - type: python
         packages: [ fastparquet ]
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, highmem, highcpu ]
\ No newline at end of file
diff --git a/src/control_methods/mean_across_compounds/config.vsh.yaml b/src/control_methods/mean_across_compounds/config.vsh.yaml
index 79c6a2cc..bfc71b1c 100644
--- a/src/control_methods/mean_across_compounds/config.vsh.yaml
+++ b/src/control_methods/mean_across_compounds/config.vsh.yaml
@@ -10,12 +10,14 @@ functionality:
     - type: python_script
       path: script.py
     - path: ../../utils/anndata_to_dataframe.py
-platforms:
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_python:1.0.4
     setup:
       - type: python
         packages: [ fastparquet ]
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, highmem, highcpu ]
\ No newline at end of file
diff --git a/src/control_methods/mean_outcome/config.vsh.yaml b/src/control_methods/mean_outcome/config.vsh.yaml
index 1f5a74cc..5f4eff95 100644
--- a/src/control_methods/mean_outcome/config.vsh.yaml
+++ b/src/control_methods/mean_outcome/config.vsh.yaml
@@ -1,21 +1,22 @@
 __merge__: ../../api/comp_control_method.yaml
-functionality:
-  name: mean_outcome
-  info:
-    label: Mean per gene
-    summary: Baseline method that returns mean of gene's outcomes
-    description: |
-      Baseline method that predicts for a gene the mean of its outcomes of all samples.
-  resources:
-    - type: python_script
-      path: script.py
-    - path: ../../utils/anndata_to_dataframe.py
-platforms:
+name: mean_outcome
+info:
+  label: Mean per gene
+  summary: Baseline method that returns mean of gene's outcomes
+  description: |
+    Baseline method that predicts for a gene the mean of its outcomes of all samples.
+resources:
+  - type: python_script
+    path: script.py
+  - path: ../../utils/anndata_to_dataframe.py
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_python:1.0.4
     setup:
       - type: python
         packages: [ fastparquet ]
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, highmem, highcpu ]
\ No newline at end of file
diff --git a/src/control_methods/sample/config.vsh.yaml b/src/control_methods/sample/config.vsh.yaml
index fcc5363a..aa580802 100644
--- a/src/control_methods/sample/config.vsh.yaml
+++ b/src/control_methods/sample/config.vsh.yaml
@@ -1,16 +1,15 @@
 __merge__: ../../api/comp_control_method.yaml
-functionality:
-  name: sample
-  info:
-    label: Sample
-    summary: Sample predictions from the training data
-    description: |
-      This method samples the training data to generate predictions.
-    preferred_normalization: counts
-  resources:
-    - type: r_script
-      path: script.R
-platforms:
+name: sample
+info:
+  label: Sample
+  summary: Sample predictions from the training data
+  description: |
+    This method samples the training data to generate predictions.
+  preferred_normalization: counts
+resources:
+  - type: r_script
+    path: script.R
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_r:1.0.4
     setup:
@@ -18,6 +17,8 @@ platforms:
         cran: [ arrow, dplyr ]
       - type: python
         packages: [ fastparquet ]
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, highmem, highcpu ]
\ No newline at end of file
diff --git a/src/control_methods/zeros/config.vsh.yaml b/src/control_methods/zeros/config.vsh.yaml
index 7f5be374..3cdc25c2 100644
--- a/src/control_methods/zeros/config.vsh.yaml
+++ b/src/control_methods/zeros/config.vsh.yaml
@@ -1,20 +1,21 @@
 __merge__: ../../api/comp_control_method.yaml
-functionality:
-  name: zeros
-  info:
-    label: Zeros
-    summary: Baseline method that predicts all zeros
-    description: |
-      Baseline method that predicts all zeros.
-  resources:
-    - type: python_script
-      path: script.py
-platforms:
+name: zeros
+info:
+  label: Zeros
+  summary: Baseline method that predicts all zeros
+  description: |
+    Baseline method that predicts all zeros.
+resources:
+  - type: python_script
+    path: script.py
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_python:1.0.4
     setup:
       - type: python
         packages: [ fastparquet ]
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, highmem, highcpu ]
\ No newline at end of file
diff --git a/src/methods/jn_ap_op2/config.vsh.yaml b/src/methods/jn_ap_op2/config.vsh.yaml
index 1da6eb5b..35c3b060 100644
--- a/src/methods/jn_ap_op2/config.vsh.yaml
+++ b/src/methods/jn_ap_op2/config.vsh.yaml
@@ -1,36 +1,35 @@
 __merge__: ../../api/comp_method.yaml
 
-functionality:
-  name: jn_ap_op2
-  info:
-    label: JN-AP-OP2
-    neurips2023_rank: 20
-    summary: "Deep learning architecture composed of 2 modules: a sample-centric MLP and a gene-centric MLP"
-    description: |
-      We first encode each sample using leave-one-out encoder based on compound and cell type. This produces X with the dimension of n_samples, n_genes, n_encode,
-      where n_encode is 2. Then, X is passed to a MLP1 sample-wise with input of n_samples, n_genes*n_encode, which outputs the same dimension data.
-      The purpose of this MLP is to learn inter-gene relationships. Then, we group the output of MLP1 with X (original encoded data) and feed it
-      to MLP2 which receives n_smaples*n_genes, (n_encode + n_encode) and results n_samples*n_genes. This MLP2 trains on each (compound, cell_type, gene)
-      combination. This is to overcome the underdetermination problem due to lack of sufficient (compound, cell_type) samples. 
-    documentation_url: https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/461159
-    repository_url: https://github.com/AntoinePassemiers/Open-Challenges-Single-Cell-Perturbations
-  arguments:
-    - type: integer
-      name: --n_replica
-      default: 10
-      info:
-        test_default: 1
-    - type: string
-      name: --submission_names
-      multiple: true
-      default: [dl40, dl200]
-      info:
-        test_default: [dl40]
-  resources:
-    - type: python_script
-      path: script.py
-    - path: helper.py
-platforms:
+name: jn_ap_op2
+info:
+  label: JN-AP-OP2
+  neurips2023_rank: 20
+  summary: "Deep learning architecture composed of 2 modules: a sample-centric MLP and a gene-centric MLP"
+  description: |
+    We first encode each sample using leave-one-out encoder based on compound and cell type. This produces X with the dimension of n_samples, n_genes, n_encode,
+    where n_encode is 2. Then, X is passed to a MLP1 sample-wise with input of n_samples, n_genes*n_encode, which outputs the same dimension data.
+    The purpose of this MLP is to learn inter-gene relationships. Then, we group the output of MLP1 with X (original encoded data) and feed it
+    to MLP2 which receives n_smaples*n_genes, (n_encode + n_encode) and results n_samples*n_genes. This MLP2 trains on each (compound, cell_type, gene)
+    combination. This is to overcome the underdetermination problem due to lack of sufficient (compound, cell_type) samples. 
+  documentation_url: https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/461159
+  repository_url: https://github.com/AntoinePassemiers/Open-Challenges-Single-Cell-Perturbations
+arguments:
+  - type: integer
+    name: --n_replica
+    default: 10
+    info:
+      test_default: 1
+  - type: string
+    name: --submission_names
+    multiple: true
+    default: [dl40, dl200]
+    info:
+      test_default: [dl40]
+resources:
+  - type: python_script
+    path: script.py
+  - path: helper.py
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_pytorch_nvidia:1.0.4
     setup:
@@ -41,6 +40,8 @@ platforms:
           - category_encoders
 
   - type: native
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ hightime, midmem, highcpu, gpu ]
diff --git a/src/methods/lgc_ensemble/config.vsh.yaml b/src/methods/lgc_ensemble/config.vsh.yaml
index b36a8aa4..2190b10b 100644
--- a/src/methods/lgc_ensemble/config.vsh.yaml
+++ b/src/methods/lgc_ensemble/config.vsh.yaml
@@ -1,55 +1,56 @@
 __merge__: ../../api/wf_method.yaml
 
-functionality:
-  name: lgc_ensemble
-  info:
-    label: LSTM-GRU-CNN Ensemble
-    neurips2023_rank: 1
-    summary: An ensemble of LSTM, GRU, and 1D CNN models
-    description: |
-      An ensemble of LSTM, GRU, and 1D CNN models with a variety of input features derived from ChemBERTa embeddings,
-      one-hot encoding of cell type/small molecule pairs, and various statistical measures of target gene expression.
-      The models were trained with a combination of MSE, MAE, LogCosh, and BCE loss functions to improve their
-      robustness and predictive performance. The approach also included data augmentation techniques to ensure
-      generalization and account for noise in the data.
-    documentation_url: https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/459258
-    repository_url: https://github.com/Jean-KOUAGOU/1st-place-solution-single-cell-pbs/tree/main
+name: lgc_ensemble
+info:
+  label: LSTM-GRU-CNN Ensemble
+  neurips2023_rank: 1
+  summary: An ensemble of LSTM, GRU, and 1D CNN models
+  description: |
+    An ensemble of LSTM, GRU, and 1D CNN models with a variety of input features derived from ChemBERTa embeddings,
+    one-hot encoding of cell type/small molecule pairs, and various statistical measures of target gene expression.
+    The models were trained with a combination of MSE, MAE, LogCosh, and BCE loss functions to improve their
+    robustness and predictive performance. The approach also included data augmentation techniques to ensure
+    generalization and account for noise in the data.
+  documentation_url: https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/459258
+  repository_url: https://github.com/Jean-KOUAGOU/1st-place-solution-single-cell-pbs/tree/main
 
-  arguments:
-    - name: --epochs
-      type: integer
-      default: 250
-      description: "Number of epochs to train."
-      info:
-        test_default: 1
-    - name: --kf_n_splits
-      type: integer
-      default: 5
-      description: "Number of splits for KFold."
-      info:
-        test_default: 2
-    - name: --schemes
-      type: string
-      default: [initial, light, heavy]
-      multiple: true
-      info:
-        test_default: [initial, light]
-    - name: --models
-      type: string
-      default: [LSTM, GRU, Conv]
-      multiple: true
-      info:
-        test_default: [LSTM, GRU]
+arguments:
+  - name: --epochs
+    type: integer
+    default: 250
+    description: "Number of epochs to train."
+    info:
+      test_default: 1
+  - name: --kf_n_splits
+    type: integer
+    default: 5
+    description: "Number of splits for KFold."
+    info:
+      test_default: 2
+  - name: --schemes
+    type: string
+    default: [initial, light, heavy]
+    multiple: true
+    info:
+      test_default: [initial, light]
+  - name: --models
+    type: string
+    default: [LSTM, GRU, Conv]
+    multiple: true
+    info:
+      test_default: [LSTM, GRU]
 
-  resources:
-    - type: nextflow_script
-      path: main.nf
-      entrypoint: run_wf
+resources:
+  - type: nextflow_script
+    path: main.nf
+    entrypoint: run_wf
 
-  dependencies:
-    - name: methods/lgc_ensemble_prepare
-    - name: methods/lgc_ensemble_train
-    - name: methods/lgc_ensemble_predict
+dependencies:
+  - name: methods/lgc_ensemble_prepare
+  - name: methods/lgc_ensemble_train
+  - name: methods/lgc_ensemble_predict
     
 platforms:
+runners:
+  - type: executable
   - type: nextflow
diff --git a/src/methods/lgc_ensemble_direct/config.vsh.yaml b/src/methods/lgc_ensemble_direct/config.vsh.yaml
index 5a6345e4..b5f76229 100644
--- a/src/methods/lgc_ensemble_direct/config.vsh.yaml
+++ b/src/methods/lgc_ensemble_direct/config.vsh.yaml
@@ -1,59 +1,58 @@
 __merge__: ../../api/comp_method.yaml
 
-functionality:
-  name: lgc_ensemble_direct
-  info:
-    label: LSTM-GRU-CNN Ensemble
-    neurips2023_rank: 1
-    summary: An ensemble of LSTM, GRU, and 1D CNN models
-    description: |
-      An ensemble of LSTM, GRU, and 1D CNN models with a variety of input features derived from ChemBERTa embeddings,
-      one-hot encoding of cell type/small molecule pairs, and various statistical measures of target gene expression.
-      The models were trained with a combination of MSE, MAE, LogCosh, and BCE loss functions to improve their
-      robustness and predictive performance. The approach also included data augmentation techniques to ensure
-      generalization and account for noise in the data.
-    documentation_url: https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/459258
-    repository_url: https://github.com/Jean-KOUAGOU/1st-place-solution-single-cell-pbs/tree/main
+name: lgc_ensemble_direct
+info:
+  label: LSTM-GRU-CNN Ensemble
+  neurips2023_rank: 1
+  summary: An ensemble of LSTM, GRU, and 1D CNN models
+  description: |
+    An ensemble of LSTM, GRU, and 1D CNN models with a variety of input features derived from ChemBERTa embeddings,
+    one-hot encoding of cell type/small molecule pairs, and various statistical measures of target gene expression.
+    The models were trained with a combination of MSE, MAE, LogCosh, and BCE loss functions to improve their
+    robustness and predictive performance. The approach also included data augmentation techniques to ensure
+    generalization and account for noise in the data.
+  documentation_url: https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/459258
+  repository_url: https://github.com/Jean-KOUAGOU/1st-place-solution-single-cell-pbs/tree/main
 
-  arguments:
-    - name: --epochs
-      type: integer
-      default: 250
-      description: "Number of epochs to train."
-      info:
-        test_default: 1
-    - name: --kf_n_splits
-      type: integer
-      default: 5
-      description: "Number of splits for KFold."
-      info:
-        test_default: 2
-    - name: --schemes
-      type: string
-      default: [initial, light, heavy]
-      multiple: true
-      info:
-        test_default: [initial, light]
-    - name: --models
-      type: string
-      default: [LSTM, GRU, Conv]
-      multiple: true
-      info:
-        test_default: [LSTM, GRU]
+arguments:
+  - name: --epochs
+    type: integer
+    default: 250
+    description: "Number of epochs to train."
+    info:
+      test_default: 1
+  - name: --kf_n_splits
+    type: integer
+    default: 5
+    description: "Number of splits for KFold."
+    info:
+      test_default: 2
+  - name: --schemes
+    type: string
+    default: [initial, light, heavy]
+    multiple: true
+    info:
+      test_default: [initial, light]
+  - name: --models
+    type: string
+    default: [LSTM, GRU, Conv]
+    multiple: true
+    info:
+      test_default: [LSTM, GRU]
 
-  resources:
-    - type: python_script
-      path: script.py
-    - path: ../lgc_ensemble_helpers/helper_classes.py
-    - path: ../lgc_ensemble_helpers/helper_functions.py
-    - path: ../lgc_ensemble_helpers/models.py
-    - path: ../lgc_ensemble_helpers/predict.py
-    - path: ../lgc_ensemble_helpers/prepare_data.py
-    - path: ../lgc_ensemble_helpers/train.py
-    - path: ../lgc_ensemble_helpers/divisor_finder.py
-    - path: ../../utils/anndata_to_dataframe.py
+resources:
+  - type: python_script
+    path: script.py
+  - path: ../lgc_ensemble_helpers/helper_classes.py
+  - path: ../lgc_ensemble_helpers/helper_functions.py
+  - path: ../lgc_ensemble_helpers/models.py
+  - path: ../lgc_ensemble_helpers/predict.py
+  - path: ../lgc_ensemble_helpers/prepare_data.py
+  - path: ../lgc_ensemble_helpers/train.py
+  - path: ../lgc_ensemble_helpers/divisor_finder.py
+  - path: ../../utils/anndata_to_dataframe.py
     
-platforms:
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_pytorch_nvidia:1.0.4
     run_args: ["--shm-size=2g"]
@@ -75,6 +74,8 @@ platforms:
           - torch-summary
 
   - type: native
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [hightime, veryhighmem, highcpu, highsharedmem, highgpu]
diff --git a/src/methods/lgc_ensemble_predict/config.vsh.yaml b/src/methods/lgc_ensemble_predict/config.vsh.yaml
index cf2f3393..904057ab 100644
--- a/src/methods/lgc_ensemble_predict/config.vsh.yaml
+++ b/src/methods/lgc_ensemble_predict/config.vsh.yaml
@@ -1,38 +1,37 @@
-functionality:
-  name: lgc_ensemble_predict
-  namespace: methods
-  arguments:
-    - name: --train_data_aug_dir
-      type: file
-      required: true
-      direction: input
-    - name: --model_files
-      type: file
-      required: true
-      direction: input
-      example: model.pt
-      multiple: true
-    - name: --id_map
-      type: file
-      required: true
-      direction: input
-    - name: --output
-      type: file
-      required: true
-      direction: output
-  resources:
-    - type: python_script
-      path: script.py
-    - path: ../lgc_ensemble_helpers/helper_classes.py
-    - path: ../lgc_ensemble_helpers/helper_functions.py
-    - path: ../lgc_ensemble_helpers/models.py
-    - path: ../lgc_ensemble_helpers/predict.py
-    - path: ../lgc_ensemble_helpers/prepare_data.py
-    - path: ../lgc_ensemble_helpers/train.py
-    - path: ../lgc_ensemble_helpers/divisor_finder.py
-    - path: ../../utils/anndata_to_dataframe.py
+name: lgc_ensemble_predict
+namespace: methods
+arguments:
+  - name: --train_data_aug_dir
+    type: file
+    required: true
+    direction: input
+  - name: --model_files
+    type: file
+    required: true
+    direction: input
+    example: model.pt
+    multiple: true
+  - name: --id_map
+    type: file
+    required: true
+    direction: input
+  - name: --output
+    type: file
+    required: true
+    direction: output
+resources:
+  - type: python_script
+    path: script.py
+  - path: ../lgc_ensemble_helpers/helper_classes.py
+  - path: ../lgc_ensemble_helpers/helper_functions.py
+  - path: ../lgc_ensemble_helpers/models.py
+  - path: ../lgc_ensemble_helpers/predict.py
+  - path: ../lgc_ensemble_helpers/prepare_data.py
+  - path: ../lgc_ensemble_helpers/train.py
+  - path: ../lgc_ensemble_helpers/divisor_finder.py
+  - path: ../../utils/anndata_to_dataframe.py
     
-platforms:
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_pytorch_nvidia:1.0.4
     run_args: ["--shm-size=2g"]
@@ -54,6 +53,8 @@ platforms:
           - torch-summary
 
   - type: native
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [hightime, veryhighmem, highcpu, highsharedmem, gpu]
diff --git a/src/methods/lgc_ensemble_prepare/config.vsh.yaml b/src/methods/lgc_ensemble_prepare/config.vsh.yaml
index 141beda3..4c6e2b27 100644
--- a/src/methods/lgc_ensemble_prepare/config.vsh.yaml
+++ b/src/methods/lgc_ensemble_prepare/config.vsh.yaml
@@ -1,61 +1,60 @@
-functionality:
-  name: lgc_ensemble_prepare
-  namespace: methods
-  arguments:
-    - name: --de_train_h5ad
-      type: file
-      required: false
-      direction: input
-    - name: --id_map
-      type: file
-      required: true
-      direction: input
-    - name: --layer
-      type: string
-      direction: input
-      default: clipped_sign_log10_pval
-      description: Which layer to use for prediction.
-    - name: --train_data_aug_dir
-      type: file
-      required: true
-      direction: output
-    - name: --epochs
-      type: integer
-      default: 250
-      description: "Number of epochs to train."
-      info:
-        test_default: 1
-    - name: --kf_n_splits
-      type: integer
-      default: 5
-      description: "Number of splits for KFold."
-      info:
-        test_default: 2
-    - name: --schemes
-      type: string
-      default: [initial, light, heavy]
-      multiple: true
-      info:
-        test_default: [initial, light]
-    - name: --models
-      type: string
-      default: [LSTM, GRU, Conv]
-      multiple: true
-      info:
-        test_default: [LSTM, GRU]
-  resources:
-    - type: python_script
-      path: script.py
-    - path: ../lgc_ensemble_helpers/helper_classes.py
-    - path: ../lgc_ensemble_helpers/helper_functions.py
-    - path: ../lgc_ensemble_helpers/models.py
-    - path: ../lgc_ensemble_helpers/predict.py
-    - path: ../lgc_ensemble_helpers/prepare_data.py
-    - path: ../lgc_ensemble_helpers/train.py
-    - path: ../lgc_ensemble_helpers/divisor_finder.py
-    - path: ../../utils/anndata_to_dataframe.py
+name: lgc_ensemble_prepare
+namespace: methods
+arguments:
+  - name: --de_train_h5ad
+    type: file
+    required: false
+    direction: input
+  - name: --id_map
+    type: file
+    required: true
+    direction: input
+  - name: --layer
+    type: string
+    direction: input
+    default: clipped_sign_log10_pval
+    description: Which layer to use for prediction.
+  - name: --train_data_aug_dir
+    type: file
+    required: true
+    direction: output
+  - name: --epochs
+    type: integer
+    default: 250
+    description: "Number of epochs to train."
+    info:
+      test_default: 1
+  - name: --kf_n_splits
+    type: integer
+    default: 5
+    description: "Number of splits for KFold."
+    info:
+      test_default: 2
+  - name: --schemes
+    type: string
+    default: [initial, light, heavy]
+    multiple: true
+    info:
+      test_default: [initial, light]
+  - name: --models
+    type: string
+    default: [LSTM, GRU, Conv]
+    multiple: true
+    info:
+      test_default: [LSTM, GRU]
+resources:
+  - type: python_script
+    path: script.py
+  - path: ../lgc_ensemble_helpers/helper_classes.py
+  - path: ../lgc_ensemble_helpers/helper_functions.py
+  - path: ../lgc_ensemble_helpers/models.py
+  - path: ../lgc_ensemble_helpers/predict.py
+  - path: ../lgc_ensemble_helpers/prepare_data.py
+  - path: ../lgc_ensemble_helpers/train.py
+  - path: ../lgc_ensemble_helpers/divisor_finder.py
+  - path: ../../utils/anndata_to_dataframe.py
     
-platforms:
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_pytorch_nvidia:1.0.4
     run_args: ["--shm-size=2g"]
@@ -77,6 +76,8 @@ platforms:
           - torch-summary
 
   - type: native
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [hightime, veryhighmem, highcpu]
diff --git a/src/methods/lgc_ensemble_train/config.vsh.yaml b/src/methods/lgc_ensemble_train/config.vsh.yaml
index bb64ac65..2486ba3c 100644
--- a/src/methods/lgc_ensemble_train/config.vsh.yaml
+++ b/src/methods/lgc_ensemble_train/config.vsh.yaml
@@ -1,51 +1,50 @@
-functionality:
-  name: lgc_ensemble_train
-  namespace: methods
-  arguments:
-    - name: --train_data_aug_dir
-      type: file
-      required: true
-      direction: input
-    - name: --scheme
-      type: string
-      required: true
-      direction: input
-      description: "The scheme to use for training."
-      choices: [initial, light, heavy]
-    - name: --model
-      type: string
-      required: true
-      direction: input
-      description: "The name of the model to train."
-      choices: [LSTM, GRU, Conv]
-    - name: --fold
-      type: integer
-      required: true
-      direction: input
-      description: "The fold to train."
-    - name: --model_file
-      type: file
-      required: true
-      direction: output
-      example: model.pt
-    - name: --log_file
-      type: file
-      required: true
-      direction: output
-      example: log.json
-  resources:
-    - type: python_script
-      path: script.py
-    - path: ../lgc_ensemble_helpers/helper_classes.py
-    - path: ../lgc_ensemble_helpers/helper_functions.py
-    - path: ../lgc_ensemble_helpers/models.py
-    - path: ../lgc_ensemble_helpers/predict.py
-    - path: ../lgc_ensemble_helpers/prepare_data.py
-    - path: ../lgc_ensemble_helpers/train.py
-    - path: ../lgc_ensemble_helpers/divisor_finder.py
-    - path: ../../utils/anndata_to_dataframe.py
+name: lgc_ensemble_train
+namespace: methods
+arguments:
+  - name: --train_data_aug_dir
+    type: file
+    required: true
+    direction: input
+  - name: --scheme
+    type: string
+    required: true
+    direction: input
+    description: "The scheme to use for training."
+    choices: [initial, light, heavy]
+  - name: --model
+    type: string
+    required: true
+    direction: input
+    description: "The name of the model to train."
+    choices: [LSTM, GRU, Conv]
+  - name: --fold
+    type: integer
+    required: true
+    direction: input
+    description: "The fold to train."
+  - name: --model_file
+    type: file
+    required: true
+    direction: output
+    example: model.pt
+  - name: --log_file
+    type: file
+    required: true
+    direction: output
+    example: log.json
+resources:
+  - type: python_script
+    path: script.py
+  - path: ../lgc_ensemble_helpers/helper_classes.py
+  - path: ../lgc_ensemble_helpers/helper_functions.py
+  - path: ../lgc_ensemble_helpers/models.py
+  - path: ../lgc_ensemble_helpers/predict.py
+  - path: ../lgc_ensemble_helpers/prepare_data.py
+  - path: ../lgc_ensemble_helpers/train.py
+  - path: ../lgc_ensemble_helpers/divisor_finder.py
+  - path: ../../utils/anndata_to_dataframe.py
     
-platforms:
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_pytorch_nvidia:1.0.4
     run_args: ["--shm-size=2g"]
@@ -67,6 +66,8 @@ platforms:
           - torch-summary
 
   - type: native
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [hightime, veryhighmem, highcpu, highsharedmem, gpu]
diff --git a/src/methods/nn_retraining_with_pseudolabels/config.vsh.yaml b/src/methods/nn_retraining_with_pseudolabels/config.vsh.yaml
index 70c4a341..461ba922 100644
--- a/src/methods/nn_retraining_with_pseudolabels/config.vsh.yaml
+++ b/src/methods/nn_retraining_with_pseudolabels/config.vsh.yaml
@@ -1,45 +1,44 @@
 __merge__: ../../api/comp_method.yaml
 
-functionality:
-  name: nn_retraining_with_pseudolabels
-  info:
-    label: NN retraining with pseudolabels
-    neurips2023_rank: 3
-    summary: Neural networks with pseudolabeling and ensemble modelling
-    description: |
-      The prediction system is two staged, so I publish two versions of the notebook.
-      The first stage predicts pseudolabels. To be honest, if I stopped on this version, I would not be the third.
-      The predicted pseudolabels on all test data (255 rows) are added to training in the second stage.
-      
-      **Stage 1 preparing pseudolabels**: The main part of this system is a neural network. Every neural network and its environment was optimized by optuna. Hyperparameters that have been optimized:
-      a dropout value, a number of neurons in particular layers, an output dimension of an embedding layer, a number of epochs, a learning rate, a batch size, a number of dimension of truncated singular value decomposition.
-      The optimization was done on custom 4-folds cross validation. In order to avoid overfitting to cross validation by optuna I applied 2 repeats for every fold and took an average. Generally, the more, the better. The optuna's criterion was MRRMSE.
-      Finally, 7 models were ensembled. Optuna was applied again to determine best weights of linear combination. The prediction of test set is the pseudolabels now and will be used in second stage.
-      
-      **Stage 2 retraining with pseudolabels**: The pseudolabels (255 rows) were added to the training dataset. I applied 20 models with optimized parameters in different experiments for a model diversity.
-      Optuna selected optimal weights for the linear combination of the prediction again.
-      Models had high variance, so every model was trained 10 times on all dataset and the median of prediction is taken as a final prediction. The prediction was additionally clipped to colwise min and max. 
-    reference: null
-    documentation_url: https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/458750
-    repository_url: https://github.com/okon2000/single_cell_perturbations
+name: nn_retraining_with_pseudolabels
+info:
+  label: NN retraining with pseudolabels
+  neurips2023_rank: 3
+  summary: Neural networks with pseudolabeling and ensemble modelling
+  description: |
+    The prediction system is two staged, so I publish two versions of the notebook.
+    The first stage predicts pseudolabels. To be honest, if I stopped on this version, I would not be the third.
+    The predicted pseudolabels on all test data (255 rows) are added to training in the second stage.
+    
+    **Stage 1 preparing pseudolabels**: The main part of this system is a neural network. Every neural network and its environment was optimized by optuna. Hyperparameters that have been optimized:
+    a dropout value, a number of neurons in particular layers, an output dimension of an embedding layer, a number of epochs, a learning rate, a batch size, a number of dimension of truncated singular value decomposition.
+    The optimization was done on custom 4-folds cross validation. In order to avoid overfitting to cross validation by optuna I applied 2 repeats for every fold and took an average. Generally, the more, the better. The optuna's criterion was MRRMSE.
+    Finally, 7 models were ensembled. Optuna was applied again to determine best weights of linear combination. The prediction of test set is the pseudolabels now and will be used in second stage.
+    
+    **Stage 2 retraining with pseudolabels**: The pseudolabels (255 rows) were added to the training dataset. I applied 20 models with optimized parameters in different experiments for a model diversity.
+    Optuna selected optimal weights for the linear combination of the prediction again.
+    Models had high variance, so every model was trained 10 times on all dataset and the median of prediction is taken as a final prediction. The prediction was additionally clipped to colwise min and max. 
+  reference: null
+  documentation_url: https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/458750
+  repository_url: https://github.com/okon2000/single_cell_perturbations
 
-  arguments:
-    - type: integer
-      name: --reps
-      default: 10
-      description: "Number of repetitions to train the model."
-      info:
-        # use only 1 rep during unit tests
-        test_default: 1
+arguments:
+  - type: integer
+    name: --reps
+    default: 10
+    description: "Number of repetitions to train the model."
+    info:
+      # use only 1 rep during unit tests
+      test_default: 1
 
-  resources:
-    - type: python_script
-      path: script.py
-    - path: notebook_264.py
-    - path: notebook_266.py
-    - path: ../../utils/anndata_to_dataframe.py
+resources:
+  - type: python_script
+    path: script.py
+  - path: notebook_264.py
+  - path: notebook_266.py
+  - path: ../../utils/anndata_to_dataframe.py
 
-platforms:
+engines:
   - type: docker
     image: nvcr.io/nvidia/tensorflow:24.03-tf2-py3
     setup:
@@ -59,6 +58,8 @@ platforms:
           - fastparquet
 
   - type: native
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ hightime, midmem, highcpu, gpu, midsharedmem ]
diff --git a/src/methods/pyboost/config.vsh.yaml b/src/methods/pyboost/config.vsh.yaml
index 85432847..a6556c5e 100644
--- a/src/methods/pyboost/config.vsh.yaml
+++ b/src/methods/pyboost/config.vsh.yaml
@@ -1,43 +1,42 @@
 __merge__: ../../api/comp_method.yaml
 
-functionality:
-  name: pyboost
-  info:
-    label: Py-boost
-    neurips2023_rank: 18
-    summary: "Py-boost predicting t-scores"
-    description: |
-      An ensemble of four models was considered: 
-      
-      * Py-boost (a ridge regression-based recommender system)
-      * ExtraTrees (a decision tree ensemble with target-encoded features)
-      * a k-nearest neighbors recommender system
-      * a ridge regression model
+name: pyboost
+info:
+  label: Py-boost
+  neurips2023_rank: 18
+  summary: "Py-boost predicting t-scores"
+  description: |
+    An ensemble of four models was considered: 
+    
+    * Py-boost (a ridge regression-based recommender system)
+    * ExtraTrees (a decision tree ensemble with target-encoded features)
+    * a k-nearest neighbors recommender system
+    * a ridge regression model
 
-      Each model offered distinct strengths and weaknesses: ExtraTrees and
-      knn were unable to extrapolate beyond the training data, while ridge
-      regression provided extrapolation capability. To enhance model performance,
-      data augmentation techniques were used, including averaging differential
-      expressions for compound mixtures and adjusting cell counts to reduce biases.
+    Each model offered distinct strengths and weaknesses: ExtraTrees and
+    knn were unable to extrapolate beyond the training data, while ridge
+    regression provided extrapolation capability. To enhance model performance,
+    data augmentation techniques were used, including averaging differential
+    expressions for compound mixtures and adjusting cell counts to reduce biases.
 
-      In the end, only the py-boost model is used for generating predictions.
-    documentation_url: https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/458661
-    repository_url: https://github.com/Ambros-M/Single-Cell-Perturbations-2023
-  arguments:
-    - type: string
-      name: --predictor_names
-      multiple: true
-      choices: [py_boost, ridge_recommender, knn_recommender, predict_extratrees]
-      default: [py_boost]
-      description: Which predictor(s) to use.
-      info:
-        test_default: [knn_recommender]
-  resources:
-    - type: python_script
-      path: script.py
-    - path: helper.py
-    - path: ../../utils/anndata_to_dataframe.py
-platforms:
+    In the end, only the py-boost model is used for generating predictions.
+  documentation_url: https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/458661
+  repository_url: https://github.com/Ambros-M/Single-Cell-Perturbations-2023
+arguments:
+  - type: string
+    name: --predictor_names
+    multiple: true
+    choices: [py_boost, ridge_recommender, knn_recommender, predict_extratrees]
+    default: [py_boost]
+    description: Which predictor(s) to use.
+    info:
+      test_default: [knn_recommender]
+resources:
+  - type: python_script
+    path: script.py
+  - path: helper.py
+  - path: ../../utils/anndata_to_dataframe.py
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_pytorch_nvidia:1.0.4
     setup:
@@ -46,6 +45,8 @@ platforms:
           - colorama
           - py-boost==0.4.3
   - type: native
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [midtime,midmem,midcpu,gpu]
\ No newline at end of file
diff --git a/src/methods/scape/config.vsh.yaml b/src/methods/scape/config.vsh.yaml
index fea2b014..9f774699 100644
--- a/src/methods/scape/config.vsh.yaml
+++ b/src/methods/scape/config.vsh.yaml
@@ -1,68 +1,67 @@
 __merge__: ../../api/comp_method.yaml
-functionality:
-  name: scape
-  info:
-    label: ScAPE
-    neurips2023_rank: 16
-    summary: Neural network model for drug effect prediction
-    description: |
-      ScAPE is utilises a neural network (NN) model to estimate drug effects on gene expression in
-      peripheral blood mononuclear cells (PBMCs). The model took drug and cell features as input,
-      with these features primarily derived from the median of signed log-pvalues and log fold-changes
-      grouped by drug and cell type. The NN was trained using a leave-one-drug-out cross-validation
-      strategy, focusing on NK cells as a representative cell type due to their similarity to B cells
-      and Myeloid cells in principal component analysis. Model performance was evaluated by comparing
-      its predictions against two baselines: predicting zero effect and predicting the median
-      log-pvalue for each drug. The final submission combined predictions from models trained on
-      different gene and drug subsets, aiming to enhance overall prediction accuracy.
-    reference: pablormier2023scape
-    documentation_url: https://docs.google.com/document/d/1w0GIJ8VoQx3HEJNmLXoU-Y_STB-h5-bXusL80_6EVuU/edit
-    repository_url: https://github.com/scapeML/scape
-  arguments:
-    - type: string
-      name: --cell
-      description: Pre-defined cell type held for pre-training.
-      required: false
-      default: NK cells
-    - type: integer
-      name: --epochs
-      description: Number of epochs for coarse training.
-      default: 300
-      info:
-        test_default: 2
-    - type: integer
-      name: --epochs_enhanced
-      description: Number of epochs for enhanced training.
-      default: 800
-      info:
-        test_default: 2
-    - type: integer
-      name: --n_genes
-      description: The number of genes for coarse training.
-      default: 64
-      info:
-        test_default: 10
-    - type: integer
-      name: --n_genes_enhanced
-      description: The number of genes for enhanced training.
-      default: 256
-      info:
-        test_default: 10
-    - type: integer
-      name: --n_drugs
-      description: The number of drugs to consider for coarse training. If none, all drugs are considered.
-      info:
-        test_default: 5
-    - type: integer
-      name: --min_n_top_drugs
-      description: The minimum number of top drugs to consider.
-      default: 50
-      info:
-        test_default: 0
-  resources:
-    - type: python_script
-      path: script.py
-platforms:
+name: scape
+info:
+  label: ScAPE
+  neurips2023_rank: 16
+  summary: Neural network model for drug effect prediction
+  description: |
+    ScAPE is utilises a neural network (NN) model to estimate drug effects on gene expression in
+    peripheral blood mononuclear cells (PBMCs). The model took drug and cell features as input,
+    with these features primarily derived from the median of signed log-pvalues and log fold-changes
+    grouped by drug and cell type. The NN was trained using a leave-one-drug-out cross-validation
+    strategy, focusing on NK cells as a representative cell type due to their similarity to B cells
+    and Myeloid cells in principal component analysis. Model performance was evaluated by comparing
+    its predictions against two baselines: predicting zero effect and predicting the median
+    log-pvalue for each drug. The final submission combined predictions from models trained on
+    different gene and drug subsets, aiming to enhance overall prediction accuracy.
+  reference: pablormier2023scape
+  documentation_url: https://docs.google.com/document/d/1w0GIJ8VoQx3HEJNmLXoU-Y_STB-h5-bXusL80_6EVuU/edit
+  repository_url: https://github.com/scapeML/scape
+arguments:
+  - type: string
+    name: --cell
+    description: Pre-defined cell type held for pre-training.
+    required: false
+    default: NK cells
+  - type: integer
+    name: --epochs
+    description: Number of epochs for coarse training.
+    default: 300
+    info:
+      test_default: 2
+  - type: integer
+    name: --epochs_enhanced
+    description: Number of epochs for enhanced training.
+    default: 800
+    info:
+      test_default: 2
+  - type: integer
+    name: --n_genes
+    description: The number of genes for coarse training.
+    default: 64
+    info:
+      test_default: 10
+  - type: integer
+    name: --n_genes_enhanced
+    description: The number of genes for enhanced training.
+    default: 256
+    info:
+      test_default: 10
+  - type: integer
+    name: --n_drugs
+    description: The number of drugs to consider for coarse training. If none, all drugs are considered.
+    info:
+      test_default: 5
+  - type: integer
+    name: --min_n_top_drugs
+    description: The minimum number of top drugs to consider.
+    default: 50
+    info:
+      test_default: 0
+resources:
+  - type: python_script
+    path: script.py
+engines:
   - type: docker
     image: nvcr.io/nvidia/tensorflow:24.03-tf2-py3
     setup:
@@ -83,6 +82,8 @@ platforms:
           - scikit-learn~=1.2.2
           - fastparquet~=2023.10.1
           - git+https://github.com/scapeML/scape.git
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ hightime, highmem, highcpu, gpu, midsharedmem ]
\ No newline at end of file
diff --git a/src/methods/transformer_ensemble/config.vsh.yaml b/src/methods/transformer_ensemble/config.vsh.yaml
index 057f06fb..7d529f01 100644
--- a/src/methods/transformer_ensemble/config.vsh.yaml
+++ b/src/methods/transformer_ensemble/config.vsh.yaml
@@ -1,50 +1,49 @@
 __merge__: ../../api/comp_method.yaml
 
-functionality:
-  name: transformer_ensemble
-  info:
-    label: Transformer Ensemble
-    neurips2023_rank: 2
-    summary: An ensemble of four transformer models, trained on diverse feature sets, with a cluster-based sampling strategy and robust validation for optimal performance.
-    description: |
-      This method employs an ensemble of four transformer models,
-      each with different weights and trained on slightly varying feature sets.
-      The feature engineering process involved one-hot encoding of categorical labels,
-      target encoding using mean and standard deviation, and enriching the feature set
-      with the standard deviation of target variables. Additionally, the dataset was
-      carefully examined to ensure data cleanliness. A sophisticated sampling strategy
-      based on K-Means clustering was employed to partition the data into training and
-      validation sets, ensuring a representative distribution. The model architecture
-      leveraged sparse and dense feature encoding, along with a transformer for effective
-      learning.
-    documentation_url: https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/458738
-    repository_url: https://github.com/Eliorkalfon/single_cell_pb
-  arguments:
-    - name: --num_train_epochs
-      type: integer
-      default: 20000
-      description: "Number of training epochs."
-      info:
-        test_default: 10
-    - name: --d_model
-      type: integer
-      default: 128
-      description: "Dimensionality of the model."
-    - name: --batch_size
-      type: integer
-      default: 32
-      description: "Batch size."
-    - name: --early_stopping
-      type: integer
-      default: 5000
-      description: "Number of epochs to wait for early stopping."
-  resources:
-    - type: python_script
-      path: script.py
-    - path: models.py
-    - path: utils.py
-    - path: train.py
-platforms:
+name: transformer_ensemble
+info:
+  label: Transformer Ensemble
+  neurips2023_rank: 2
+  summary: An ensemble of four transformer models, trained on diverse feature sets, with a cluster-based sampling strategy and robust validation for optimal performance.
+  description: |
+    This method employs an ensemble of four transformer models,
+    each with different weights and trained on slightly varying feature sets.
+    The feature engineering process involved one-hot encoding of categorical labels,
+    target encoding using mean and standard deviation, and enriching the feature set
+    with the standard deviation of target variables. Additionally, the dataset was
+    carefully examined to ensure data cleanliness. A sophisticated sampling strategy
+    based on K-Means clustering was employed to partition the data into training and
+    validation sets, ensuring a representative distribution. The model architecture
+    leveraged sparse and dense feature encoding, along with a transformer for effective
+    learning.
+  documentation_url: https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/458738
+  repository_url: https://github.com/Eliorkalfon/single_cell_pb
+arguments:
+  - name: --num_train_epochs
+    type: integer
+    default: 20000
+    description: "Number of training epochs."
+    info:
+      test_default: 10
+  - name: --d_model
+    type: integer
+    default: 128
+    description: "Dimensionality of the model."
+  - name: --batch_size
+    type: integer
+    default: 32
+    description: "Batch size."
+  - name: --early_stopping
+    type: integer
+    default: 5000
+    description: "Number of epochs to wait for early stopping."
+resources:
+  - type: python_script
+    path: script.py
+  - path: models.py
+  - path: utils.py
+  - path: train.py
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_pytorch_nvidia:1.0.4
     setup:
@@ -59,6 +58,8 @@ platforms:
           - matplotlib~=3.5.0
           - PyYAML~=6.0.1
           - lion-pytorch
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, veryhighmem, highcpu, gpu ]
diff --git a/src/metrics/mean_rowwise_correlation/config.vsh.yaml b/src/metrics/mean_rowwise_correlation/config.vsh.yaml
index 19fcbb05..926d39a1 100644
--- a/src/metrics/mean_rowwise_correlation/config.vsh.yaml
+++ b/src/metrics/mean_rowwise_correlation/config.vsh.yaml
@@ -1,65 +1,66 @@
 __merge__: ../../api/comp_metric.yaml
-functionality:
-  name: mean_rowwise_correlation
-  info:
-    metrics:
-      - name: mean_rowwise_pearson
-        label: Mean Rowwise Pearson
-        summary: The mean of Pearson correlations per row (perturbation).
-        description: |
-          The **Mean Pearson Correlation** is computed as follows:
+name: mean_rowwise_correlation
+info:
+  metrics:
+    - name: mean_rowwise_pearson
+      label: Mean Rowwise Pearson
+      summary: The mean of Pearson correlations per row (perturbation).
+      description: |
+        The **Mean Pearson Correlation** is computed as follows:
 
-          $$
-          \textrm{Mean-Pearson} = \frac{1}{R}\sum_{i=1}^R\frac{\textrm{Cov}(\mathbf{y}_i, \mathbf{\hat{y}}_i)}{\textrm{Var}(\mathbf{y}_i) \cdot \textrm{Var}(\mathbf{\hat{y}}_i)}
-          $$
+        $$
+        \textrm{Mean-Pearson} = \frac{1}{R}\sum_{i=1}^R\frac{\textrm{Cov}(\mathbf{y}_i, \mathbf{\hat{y}}_i)}{\textrm{Var}(\mathbf{y}_i) \cdot \textrm{Var}(\mathbf{\hat{y}}_i)}
+        $$
 
-          where $(R)$ is the number of scored rows, and $(\mathbf{y}_i)$ and $(\mathbf{\hat{y}}_i)$ are the actual and predicted values, respectively, for row $(i)$.
-        repository_url: null
-        documentation_url: null
-        min: -1
-        max: 1
-        maximize: true
-      - name: mean_rowwise_spearman
-        label: Mean Rowwise Spearman
-        summary: The mean of Spearman correlations per row (perturbation).
-        description: |
-          The **Mean Spearman Correlation** is computed as follows:
+        where $(R)$ is the number of scored rows, and $(\mathbf{y}_i)$ and $(\mathbf{\hat{y}}_i)$ are the actual and predicted values, respectively, for row $(i)$.
+      repository_url: null
+      documentation_url: null
+      min: -1
+      max: 1
+      maximize: true
+    - name: mean_rowwise_spearman
+      label: Mean Rowwise Spearman
+      summary: The mean of Spearman correlations per row (perturbation).
+      description: |
+        The **Mean Spearman Correlation** is computed as follows:
 
-          $$
-          \textrm{Mean-Pearson} = \frac{1}{R}\sum_{i=1}^R\frac{\textrm{Cov}(\mathbf{r}_i, \mathbf{\hat{r}}_i)}{\textrm{Var}(\mathbf{r}_i) \cdot \textrm{Var}(\mathbf{\hat{r}}_i)}
-          $$
+        $$
+        \textrm{Mean-Pearson} = \frac{1}{R}\sum_{i=1}^R\frac{\textrm{Cov}(\mathbf{r}_i, \mathbf{\hat{r}}_i)}{\textrm{Var}(\mathbf{r}_i) \cdot \textrm{Var}(\mathbf{\hat{r}}_i)}
+        $$
 
-          where $(R)$ is the number of scored rows, and $(\mathbf{r}_i)$ and $(\mathbf{\hat{r}}_i)$ are the ranks of the actual and predicted values, respectively, for row $(i)$.
-        repository_url: null
-        documentation_url: null
-        min: -1
-        max: 1
-        maximize: true
-      - name: mean_rowwise_cosine
-        label: Mean Rowwise Cosine
-        summary: The mean of cosine similarities per row (perturbation).
-        description: |
-          The **Mean Cosine Similarity** is computed as follows:
+        where $(R)$ is the number of scored rows, and $(\mathbf{r}_i)$ and $(\mathbf{\hat{r}}_i)$ are the ranks of the actual and predicted values, respectively, for row $(i)$.
+      repository_url: null
+      documentation_url: null
+      min: -1
+      max: 1
+      maximize: true
+    - name: mean_rowwise_cosine
+      label: Mean Rowwise Cosine
+      summary: The mean of cosine similarities per row (perturbation).
+      description: |
+        The **Mean Cosine Similarity** is computed as follows:
 
-          $$
-          \textrm{Mean-Cosine} = \frac{1}{R}\sum_{i=1}^R\frac{\mathbf{y}_i\cdot \mathbf{\hat{y}}_i}{\|\mathbf{y}_i\| \|\mathbf{\hat{y}}_i\|}
-          $$
+        $$
+        \textrm{Mean-Cosine} = \frac{1}{R}\sum_{i=1}^R\frac{\mathbf{y}_i\cdot \mathbf{\hat{y}}_i}{\|\mathbf{y}_i\| \|\mathbf{\hat{y}}_i\|}
+        $$
 
-          where $(R)$ is the number of scored rows, and $(\mathbf{y}_i)$ and $(\mathbf{\hat{y}}_i)$ are the actual and predicted values, respectively, for row $(i)$.
-        repository_url: null
-        documentation_url: null
-        min: -1
-        max: 1
-        maximize: true
-  resources:
-    - type: r_script
-      path: script.R
-platforms:
+        where $(R)$ is the number of scored rows, and $(\mathbf{y}_i)$ and $(\mathbf{\hat{y}}_i)$ are the actual and predicted values, respectively, for row $(i)$.
+      repository_url: null
+      documentation_url: null
+      min: -1
+      max: 1
+      maximize: true
+resources:
+  - type: r_script
+    path: script.R
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_r:1.0.4
     setup:
       - type: r
         packages: proxyC
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, highmem, highcpu ]
\ No newline at end of file
diff --git a/src/metrics/mean_rowwise_error/config.vsh.yaml b/src/metrics/mean_rowwise_error/config.vsh.yaml
index 15d8317d..d1589a1a 100644
--- a/src/metrics/mean_rowwise_error/config.vsh.yaml
+++ b/src/metrics/mean_rowwise_error/config.vsh.yaml
@@ -1,49 +1,50 @@
 __merge__: ../../api/comp_metric.yaml
-functionality:
-  name: mean_rowwise_error
-  info:
-    metrics:
-      - name: mean_rowwise_rmse
-        label: Mean Rowwise RMSE
-        summary: The mean of the root mean squared error (RMSE) of each row in the matrix.
-        description: |
-          We use the **Mean Rowwise Root Mean Squared Error** to score submissions, computed as follows:
+name: mean_rowwise_error
+info:
+  metrics:
+    - name: mean_rowwise_rmse
+      label: Mean Rowwise RMSE
+      summary: The mean of the root mean squared error (RMSE) of each row in the matrix.
+      description: |
+        We use the **Mean Rowwise Root Mean Squared Error** to score submissions, computed as follows:
+
+        $$
+        \textrm{MRRMSE} = \frac{1}{R}\sum_{i=1}^R\left(\frac{1}{n} \sum_{j=1}^{n} (y_{ij} - \widehat{y}_{ij})^2\right)^{1/2}
+        $$
+
+        where $(R)$ is the number of scored rows, and $(y_{ij})$ and $(\widehat{y}_{ij})$ are the actual and predicted values, respectively, for row $(i)$ and column $(j)$, and $(n)$ bis the number of columns.
+      repository_url: null
+      documentation_url: null
+      min: 0
+      max: "+inf"
+      maximize: false
+    - name: mean_rowwise_mae
+      label: Mean Rowwise MAE
+      summary: The mean of the absolute error (MAE) of each row in the matrix.
+      description: |
+          We use the **Mean Rowwise Absolute Error** to score submissions, computed as follows:
 
           $$
-          \textrm{MRRMSE} = \frac{1}{R}\sum_{i=1}^R\left(\frac{1}{n} \sum_{j=1}^{n} (y_{ij} - \widehat{y}_{ij})^2\right)^{1/2}
+          \textrm{MRMAE} = \frac{1}{R}\sum_{i=1}^R\left(\frac{1}{n} \sum_{j=1}^{n} |y_{ij} - \widehat{y}_{ij}|\right)
           $$
-
+        
           where $(R)$ is the number of scored rows, and $(y_{ij})$ and $(\widehat{y}_{ij})$ are the actual and predicted values, respectively, for row $(i)$ and column $(j)$, and $(n)$ bis the number of columns.
-        repository_url: null
-        documentation_url: null
-        min: 0
-        max: "+inf"
-        maximize: false
-      - name: mean_rowwise_mae
-        label: Mean Rowwise MAE
-        summary: The mean of the absolute error (MAE) of each row in the matrix.
-        description: |
-            We use the **Mean Rowwise Absolute Error** to score submissions, computed as follows:
-
-            $$
-            \textrm{MRMAE} = \frac{1}{R}\sum_{i=1}^R\left(\frac{1}{n} \sum_{j=1}^{n} |y_{ij} - \widehat{y}_{ij}|\right)
-            $$
-          
-            where $(R)$ is the number of scored rows, and $(y_{ij})$ and $(\widehat{y}_{ij})$ are the actual and predicted values, respectively, for row $(i)$ and column $(j)$, and $(n)$ bis the number of columns.
-        repository_url: null
-        documentation_url: null
-        min: 0
-        max: "+inf"
-        maximize: false
-  resources:
-    - type: r_script
-      path: script.R
-platforms:
+      repository_url: null
+      documentation_url: null
+      min: 0
+      max: "+inf"
+      maximize: false
+resources:
+  - type: r_script
+    path: script.R
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_r:1.0.4
     setup:
       - type: r
         packages: proxyC
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, highmem, highcpu ]
\ No newline at end of file
diff --git a/src/process_dataset/add_uns_metadata/config.vsh.yaml b/src/process_dataset/add_uns_metadata/config.vsh.yaml
index 6244a417..2a9a95f6 100644
--- a/src/process_dataset/add_uns_metadata/config.vsh.yaml
+++ b/src/process_dataset/add_uns_metadata/config.vsh.yaml
@@ -1,58 +1,59 @@
-functionality:
-  name: add_uns_metadata
-  namespace: "process_dataset"
-  info:
-    type: process_dataset
-    type_info:
-      label: Add metadata
-      summary: Add metadata to the pseudobulked data
-      description: |
-        Add metadata to the pseudobulked single-cell dataset for the perturbation regression task.
-  arguments:
-    - name: --input
-      type: file
-      required: true
-      direction: input
-      example: resources/neurips-2023-raw/pseudobulk_cleaned.h5ad
-    - name: "--dataset_id"
-      type: string
-      description: Unique identifier of the dataset.
-      required: true
-    - name: "--dataset_name"
-      type: string
-      description: Nicely formatted name.
-      required: true
-    - name: "--dataset_url"
-      type: string
-      description: Link to the original source of the dataset.
-      required: false
-    - name: "--dataset_reference"
-      type: string
-      description: Bibtex reference of the paper in which the dataset was published.
-      required: false
-    - name: "--dataset_summary"
-      type: string
-      description: Short description of the dataset.
-      required: true
-    - name: "--dataset_description"
-      type: string
-      description: Long description of the dataset.
-      required: true
-    - name: "--dataset_organism"
-      type: string
-      description: The organism of the dataset.
-      required: true
-    - name: --output
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/pseudobulk_uns.h5ad
-  resources:
-    - type: python_script
-      path: script.py
-platforms:
+name: add_uns_metadata
+namespace: "process_dataset"
+info:
+  type: process_dataset
+  type_info:
+    label: Add metadata
+    summary: Add metadata to the pseudobulked data
+    description: |
+      Add metadata to the pseudobulked single-cell dataset for the perturbation regression task.
+arguments:
+  - name: --input
+    type: file
+    required: true
+    direction: input
+    example: resources/neurips-2023-raw/pseudobulk_cleaned.h5ad
+  - name: "--dataset_id"
+    type: string
+    description: Unique identifier of the dataset.
+    required: true
+  - name: "--dataset_name"
+    type: string
+    description: Nicely formatted name.
+    required: true
+  - name: "--dataset_url"
+    type: string
+    description: Link to the original source of the dataset.
+    required: false
+  - name: "--dataset_reference"
+    type: string
+    description: Bibtex reference of the paper in which the dataset was published.
+    required: false
+  - name: "--dataset_summary"
+    type: string
+    description: Short description of the dataset.
+    required: true
+  - name: "--dataset_description"
+    type: string
+    description: Long description of the dataset.
+    required: true
+  - name: "--dataset_organism"
+    type: string
+    description: The organism of the dataset.
+    required: true
+  - name: --output
+    type: file
+    required: true
+    direction: output
+    example: resources/neurips-2023-data/pseudobulk_uns.h5ad
+resources:
+  - type: python_script
+    path: script.py
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_python:1.0.4
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, midmem, lowcpu ]
diff --git a/src/process_dataset/bootstrap/config.vsh.yaml b/src/process_dataset/bootstrap/config.vsh.yaml
index 391ddca6..70c8794c 100644
--- a/src/process_dataset/bootstrap/config.vsh.yaml
+++ b/src/process_dataset/bootstrap/config.vsh.yaml
@@ -1,61 +1,62 @@
-functionality:
-  name: bootstrap
-  namespace: "process_dataset"
-  info:
-    type: process_dataset
-    type_info:
-      label: Bootstrap
-      summary: Bootstrap a dataset
-      description: |
-        This component bootstraps a dataset.
-  argument_groups:
-    - name: Inputs
-      arguments:
-      - name: --input
+name: bootstrap
+namespace: "process_dataset"
+info:
+  type: process_dataset
+  type_info:
+    label: Bootstrap
+    summary: Bootstrap a dataset
+    description: |
+      This component bootstraps a dataset.
+argument_groups:
+  - name: Inputs
+    arguments:
+    - name: --input
+      type: file
+      required: true
+      direction: input
+      example: resources/neurips-2023-raw/sc_counts_reannotated_with_counts.h5ad
+  - name: Outputs
+    arguments:
+      - name: --output
         type: file
         required: true
-        direction: input
-        example: resources/neurips-2023-raw/sc_counts_reannotated_with_counts.h5ad
-    - name: Outputs
-      arguments:
-        - name: --output
-          type: file
-          required: true
-          direction: output
-          example: sc_counts_bootstrap.h5ad
-    - name: Sampling parameters
-      description: Parameters for sampling the bootstraps.
-      arguments:
-        - name: --bootstrap_obs
-          type: boolean
-          default: true
-          description: Whether to sample observations.
-        - name: --obs_fraction
-          type: double
-          default: 1
-          description: Fraction of the obs of the sc_counts to include in each bootstrap.
-        - name: --obs_replace
-          type: boolean
-          default: true
-          description: Whether to sample with replacement.
-        - name: --bootstrap_var
-          type: boolean
-          default: false
-          description: Whether to sample variables.
-        - name: --var_fraction
-          type: double
-          default: 1
-          description: Fraction of the var of the sc_counts to include in each bootstrap.
-        - name: --var_replace
-          type: boolean
-          default: true
-          description: Whether to sample with replacement.
-  resources:
-    - type: python_script
-      path: script.py
-platforms:
+        direction: output
+        example: sc_counts_bootstrap.h5ad
+  - name: Sampling parameters
+    description: Parameters for sampling the bootstraps.
+    arguments:
+      - name: --bootstrap_obs
+        type: boolean
+        default: true
+        description: Whether to sample observations.
+      - name: --obs_fraction
+        type: double
+        default: 1
+        description: Fraction of the obs of the sc_counts to include in each bootstrap.
+      - name: --obs_replace
+        type: boolean
+        default: true
+        description: Whether to sample with replacement.
+      - name: --bootstrap_var
+        type: boolean
+        default: false
+        description: Whether to sample variables.
+      - name: --var_fraction
+        type: double
+        default: 1
+        description: Fraction of the var of the sc_counts to include in each bootstrap.
+      - name: --var_replace
+        type: boolean
+        default: true
+        description: Whether to sample with replacement.
+resources:
+  - type: python_script
+    path: script.py
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_python:1.0.4
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, highmem, midcpu ]
diff --git a/src/process_dataset/compute_pseudobulk/config.vsh.yaml b/src/process_dataset/compute_pseudobulk/config.vsh.yaml
index d8cd9aaf..23c61213 100644
--- a/src/process_dataset/compute_pseudobulk/config.vsh.yaml
+++ b/src/process_dataset/compute_pseudobulk/config.vsh.yaml
@@ -1,33 +1,34 @@
-functionality:
-  name: compute_pseudobulk
-  namespace: "process_dataset"
-  info:
-    type: process_dataset
-    type_info:
-      label: Pseudobulk
-      summary: Compute pseudobulk data
-      description: |
-        Compute pseudobulk data for the perturbation regression task.
-  arguments:
-    - name: --input
-      type: file
-      required: true
-      direction: input
-      example: resources/neurips-2023-raw/sc_counts.h5ad
-    - name: --output
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/pseudobulk.h5ad
-  resources:
-    - type: python_script
-      path: script.py
-platforms:
+name: compute_pseudobulk
+namespace: "process_dataset"
+info:
+  type: process_dataset
+  type_info:
+    label: Pseudobulk
+    summary: Compute pseudobulk data
+    description: |
+      Compute pseudobulk data for the perturbation regression task.
+arguments:
+  - name: --input
+    type: file
+    required: true
+    direction: input
+    example: resources/neurips-2023-raw/sc_counts.h5ad
+  - name: --output
+    type: file
+    required: true
+    direction: output
+    example: resources/neurips-2023-data/pseudobulk.h5ad
+resources:
+  - type: python_script
+    path: script.py
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_python:1.0.4
     setup:
       - type: python
         packages: [ pyarrow ]
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, midmem, lowcpu ]
diff --git a/src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml b/src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml
index 2a703694..952bdcec 100644
--- a/src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml
+++ b/src/process_dataset/convert_h5ad_to_parquet/config.vsh.yaml
@@ -1,49 +1,50 @@
-functionality:
-  name: convert_h5ad_to_parquet
-  namespace: "process_dataset"
-  info:
-    type: process_dataset
-    type_info:
-      label: Split dataset
-      summary: Split dataset into training and test parquet files
-      description: |
-        Split dataset into training and test parquet files.
-  arguments:
-    - name: --input_train
-      type: file
-      required: true
-      direction: input
-      example: resources/neurips-2023-data/de_train.h5ad
-    - name: --input_test
-      type: file
-      required: true
-      direction: input
-      example: resources/neurips-2023-data/de_test.h5ad
-    - name: --output_train
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/de_train.parquet
-    - name: --output_test
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/de_test.parquet
-    - name: --output_id_map
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/id_map.csv
-  resources:
-    - type: python_script
-      path: script.py
-    - path: ../../utils/anndata_to_dataframe.py
-platforms:
+name: convert_h5ad_to_parquet
+namespace: "process_dataset"
+info:
+  type: process_dataset
+  type_info:
+    label: Split dataset
+    summary: Split dataset into training and test parquet files
+    description: |
+      Split dataset into training and test parquet files.
+arguments:
+  - name: --input_train
+    type: file
+    required: true
+    direction: input
+    example: resources/neurips-2023-data/de_train.h5ad
+  - name: --input_test
+    type: file
+    required: true
+    direction: input
+    example: resources/neurips-2023-data/de_test.h5ad
+  - name: --output_train
+    type: file
+    required: true
+    direction: output
+    example: resources/neurips-2023-data/de_train.parquet
+  - name: --output_test
+    type: file
+    required: true
+    direction: output
+    example: resources/neurips-2023-data/de_test.parquet
+  - name: --output_id_map
+    type: file
+    required: true
+    direction: output
+    example: resources/neurips-2023-data/id_map.csv
+resources:
+  - type: python_script
+    path: script.py
+  - path: ../../utils/anndata_to_dataframe.py
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_python:1.0.4
     setup:
       - type: python
         packages: [ fastparquet, anndata, pandas ]
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, midmem, lowcpu ]
diff --git a/src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml b/src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml
index 858a7060..80f48205 100644
--- a/src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml
+++ b/src/process_dataset/convert_kaggle_h5ad_to_parquet/config.vsh.yaml
@@ -1,87 +1,88 @@
-functionality:
-  name: convert_kaggle_h5ad_to_parquet
-  namespace: "process_dataset"
-  info:
-    type: process_dataset
-    type_info:
-      label: H5AD to Parquet
-      summary: Convert Kaggle h5ad to parquet
-      description: |
-        Convert dataset from h5ad files into training and test parquet files.
-  argument_groups:
-    - name: Inputs
-      arguments:
-      - name: --input_train
+name: convert_kaggle_h5ad_to_parquet
+namespace: "process_dataset"
+info:
+  type: process_dataset
+  type_info:
+    label: H5AD to Parquet
+    summary: Convert Kaggle h5ad to parquet
+    description: |
+      Convert dataset from h5ad files into training and test parquet files.
+argument_groups:
+  - name: Inputs
+    arguments:
+    - name: --input_train
+      type: file
+      required: true
+      direction: input
+      example: resources/neurips-2023-kaggle/12_de_by_cell_type_train.h5ad
+    - name: --input_test
+      type: file
+      required: true
+      direction: input
+      example: resources/neurips-2023-kaggle/12_de_by_cell_type_test.h5ad
+    - name: --input_single_cell_h5ad
+      type: file
+      required: true
+      direction: input
+      example: resources/neurips-2023-raw/sc_counts.h5ad
+  - name: Metadata
+    arguments:
+      - name: "--dataset_id"
+        type: string
+        description: Unique identifier of the dataset.
+        required: true
+      - name: "--dataset_name"
+        type: string
+        description: Nicely formatted name.
+        required: true
+      - name: "--dataset_url"
+        type: string
+        description: Link to the original source of the dataset.
+        required: false
+      - name: "--dataset_reference"
+        type: string
+        description: Bibtex reference of the paper in which the dataset was published.
+        required: false
+      - name: "--dataset_summary"
+        type: string
+        description: Short description of the dataset.
+        required: true
+      - name: "--dataset_description"
+        type: string
+        description: Long description of the dataset.
+        required: true
+      - name: "--dataset_organism"
+        type: string
+        description: The organism of the dataset.
+        required: true
+  - name: Outputs
+    arguments:
+      - name: --output_train_h5ad
         type: file
         required: true
-        direction: input
-        example: resources/neurips-2023-kaggle/12_de_by_cell_type_train.h5ad
-      - name: --input_test
+        direction: output
+        example: resources/neurips-2023-kaggle/de_train.h5ad
+      - name: --output_test_h5ad
         type: file
         required: true
-        direction: input
-        example: resources/neurips-2023-kaggle/12_de_by_cell_type_test.h5ad
-      - name: --input_single_cell_h5ad
+        direction: output
+        example: resources/neurips-2023-kaggle/de_test.h5ad
+      - name: --output_id_map
         type: file
         required: true
-        direction: input
-        example: resources/neurips-2023-raw/sc_counts.h5ad
-    - name: Metadata
-      arguments:
-        - name: "--dataset_id"
-          type: string
-          description: Unique identifier of the dataset.
-          required: true
-        - name: "--dataset_name"
-          type: string
-          description: Nicely formatted name.
-          required: true
-        - name: "--dataset_url"
-          type: string
-          description: Link to the original source of the dataset.
-          required: false
-        - name: "--dataset_reference"
-          type: string
-          description: Bibtex reference of the paper in which the dataset was published.
-          required: false
-        - name: "--dataset_summary"
-          type: string
-          description: Short description of the dataset.
-          required: true
-        - name: "--dataset_description"
-          type: string
-          description: Long description of the dataset.
-          required: true
-        - name: "--dataset_organism"
-          type: string
-          description: The organism of the dataset.
-          required: true
-    - name: Outputs
-      arguments:
-        - name: --output_train_h5ad
-          type: file
-          required: true
-          direction: output
-          example: resources/neurips-2023-kaggle/de_train.h5ad
-        - name: --output_test_h5ad
-          type: file
-          required: true
-          direction: output
-          example: resources/neurips-2023-kaggle/de_test.h5ad
-        - name: --output_id_map
-          type: file
-          required: true
-          direction: output
-          example: resources/neurips-2023-kaggle/id_map.csv
-  resources:
-    - type: python_script
-      path: script.py
-platforms:
+        direction: output
+        example: resources/neurips-2023-kaggle/id_map.csv
+resources:
+  - type: python_script
+    path: script.py
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_python:1.0.4
     setup:
       - type: python
         packages: [ fastparquet, anndata, pandas ]
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, midmem, lowcpu ]
diff --git a/src/process_dataset/filter_obs/config.vsh.yaml b/src/process_dataset/filter_obs/config.vsh.yaml
index 831ce031..d47e17a4 100644
--- a/src/process_dataset/filter_obs/config.vsh.yaml
+++ b/src/process_dataset/filter_obs/config.vsh.yaml
@@ -1,33 +1,34 @@
-functionality:
-  name: filter_obs
-  namespace: "process_dataset"
-  info:
-    type: process_dataset
-    type_info:
-      label: Filter observations
-      summary: Remove low-quality observations from the dataset
-      description: |
-        This task removes low-quality observations from the dataset.
-  arguments:
-    - name: --input
-      type: file
-      required: true
-      direction: input
-      example: resources/neurips-2023-raw/sc_counts.h5ad
-    - name: --output
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/sc_counts_cleaned.h5ad
-  resources:
-    - type: r_script
-      path: script.R
-platforms:
+name: filter_obs
+namespace: "process_dataset"
+info:
+  type: process_dataset
+  type_info:
+    label: Filter observations
+    summary: Remove low-quality observations from the dataset
+    description: |
+      This task removes low-quality observations from the dataset.
+arguments:
+  - name: --input
+    type: file
+    required: true
+    direction: input
+    example: resources/neurips-2023-raw/sc_counts.h5ad
+  - name: --output
+    type: file
+    required: true
+    direction: output
+    example: resources/neurips-2023-data/sc_counts_cleaned.h5ad
+resources:
+  - type: r_script
+    path: script.R
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_r:1.0.4
     setup:
       - type: r
         cran: [ dplyr, tidyr, purrr, tibble ]
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, midmem, lowcpu ]
diff --git a/src/process_dataset/filter_vars/config.vsh.yaml b/src/process_dataset/filter_vars/config.vsh.yaml
index 56260b55..c6eb6ea3 100644
--- a/src/process_dataset/filter_vars/config.vsh.yaml
+++ b/src/process_dataset/filter_vars/config.vsh.yaml
@@ -1,33 +1,34 @@
-functionality:
-  name: filter_vars
-  namespace: "process_dataset"
-  info:
-    type: process_dataset
-    type_info:
-      label: Filter variables
-      summary: Remove low-quality variables from the dataset
-      description: |
-        This task removes low-quality variables from the dataset.
-  arguments:
-    - name: --input
-      type: file
-      required: true
-      direction: input
-      example: resources/neurips-2023-raw/pseudobulk.h5ad
-    - name: --output
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/pseudobulk_cleaned.h5ad
-  resources:
-    - type: r_script
-      path: script.R
-platforms:
+name: filter_vars
+namespace: "process_dataset"
+info:
+  type: process_dataset
+  type_info:
+    label: Filter variables
+    summary: Remove low-quality variables from the dataset
+    description: |
+      This task removes low-quality variables from the dataset.
+arguments:
+  - name: --input
+    type: file
+    required: true
+    direction: input
+    example: resources/neurips-2023-raw/pseudobulk.h5ad
+  - name: --output
+    type: file
+    required: true
+    direction: output
+    example: resources/neurips-2023-data/pseudobulk_cleaned.h5ad
+resources:
+  - type: r_script
+    path: script.R
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_r:1.0.4
     setup:
       - type: r
         cran: [ edgeR, limma, dplyr, tidyr, purrr, tibble ]
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, midmem, lowcpu ]
diff --git a/src/process_dataset/generate_id_map/config.vsh.yaml b/src/process_dataset/generate_id_map/config.vsh.yaml
index 7669b606..97a9af95 100644
--- a/src/process_dataset/generate_id_map/config.vsh.yaml
+++ b/src/process_dataset/generate_id_map/config.vsh.yaml
@@ -1,33 +1,34 @@
-functionality:
-  name: generate_id_map
-  namespace: "process_dataset"
-  info:
-    type: process_dataset
-    type_info:
-      label: Generate ID map
-      summary: Generate the ID map file for competitors
-      description: |
-        This task generates the ID map file for competitors.
-  arguments:
-    - name: --de_test_h5ad
-      type: file
-      required: true
-      direction: input
-      example: resources/neurips-2023-data/de_test.h5ad
-    - name: --id_map
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/id_map.csv
-  resources:
-    - type: python_script
-      path: script.py
-platforms:
+name: generate_id_map
+namespace: "process_dataset"
+info:
+  type: process_dataset
+  type_info:
+    label: Generate ID map
+    summary: Generate the ID map file for competitors
+    description: |
+      This task generates the ID map file for competitors.
+arguments:
+  - name: --de_test_h5ad
+    type: file
+    required: true
+    direction: input
+    example: resources/neurips-2023-data/de_test.h5ad
+  - name: --id_map
+    type: file
+    required: true
+    direction: output
+    example: resources/neurips-2023-data/id_map.csv
+resources:
+  - type: python_script
+    path: script.py
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_python:1.0.4
     setup:
       - type: python
         packages: [ anndata ]
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, midmem, lowcpu ]
diff --git a/src/process_dataset/run_limma/config.vsh.yaml b/src/process_dataset/run_limma/config.vsh.yaml
index cd84c852..2730a0be 100644
--- a/src/process_dataset/run_limma/config.vsh.yaml
+++ b/src/process_dataset/run_limma/config.vsh.yaml
@@ -1,55 +1,56 @@
-functionality:
-  name: run_limma
-  namespace: "process_dataset"
-  info:
-    type: process_dataset
-    type_info:
-      label: Limma
-      summary: Run limma
-      description: |
-        Run limma for the perturbation regression task.
-  arguments:
-    - name: --input
-      type: file
-      required: true
-      direction: input
-      example: resources/neurips-2023-data/pseudobulk_cleaned.h5ad
-    - name: --input_splits
-      type: string
-      multiple: true
-      description: The splits to use for the limma fitting
-      example: [ train, control, public_test, private_test ]
-    - name: --output
-      type: file
-      required: true
-      direction: output
-      example: resources/neurips-2023-data/de.h5ad
-    - name: --output_splits
-      type: string
-      multiple: true
-      description: The splits to use for DE analysis
-    - name: --de_sig_cutoff
-      type: double
-      required: false
-      default: 0.05
-    - name: --clipping_cutoff
-      type: double
-      required: false
-      default: 0.0001
-      description: Clip the log p-values between log10(clip) and -log10(clip)
-    - name: --control_compound
-      type: string
-      required: false
-      default: "Dimethyl Sulfoxide"
-  resources:
-    - type: r_script
-      path: script.R
-platforms:
+name: run_limma
+namespace: "process_dataset"
+info:
+  type: process_dataset
+  type_info:
+    label: Limma
+    summary: Run limma
+    description: |
+      Run limma for the perturbation regression task.
+arguments:
+  - name: --input
+    type: file
+    required: true
+    direction: input
+    example: resources/neurips-2023-data/pseudobulk_cleaned.h5ad
+  - name: --input_splits
+    type: string
+    multiple: true
+    description: The splits to use for the limma fitting
+    example: [ train, control, public_test, private_test ]
+  - name: --output
+    type: file
+    required: true
+    direction: output
+    example: resources/neurips-2023-data/de.h5ad
+  - name: --output_splits
+    type: string
+    multiple: true
+    description: The splits to use for DE analysis
+  - name: --de_sig_cutoff
+    type: double
+    required: false
+    default: 0.05
+  - name: --clipping_cutoff
+    type: double
+    required: false
+    default: 0.0001
+    description: Clip the log p-values between log10(clip) and -log10(clip)
+  - name: --control_compound
+    type: string
+    required: false
+    default: "Dimethyl Sulfoxide"
+resources:
+  - type: r_script
+    path: script.R
+engines:
   - type: docker
     image: ghcr.io/openproblems-bio/base_r:1.0.4
     setup:
       - type: r
         bioc: [ edgeR, limma, dplyr, tidyr, purrr, tibble, furrr, future ]
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, midmem, highcpu ]
diff --git a/src/workflows/process_dataset/config.vsh.yaml b/src/workflows/process_dataset/config.vsh.yaml
index 39c5332b..3d30a762 100644
--- a/src/workflows/process_dataset/config.vsh.yaml
+++ b/src/workflows/process_dataset/config.vsh.yaml
@@ -1,48 +1,48 @@
 __merge__: ../../api/comp_process_dataset.yaml
-functionality:
-  name: process_dataset
-  namespace: "workflows"
-  arguments:
-    - name: "--dataset_id"
-      type: string
-      description: Unique identifier of the dataset.
-      required: true
-    - name: "--dataset_name"
-      type: string
-      description: Nicely formatted name.
-      required: true
-    - name: "--dataset_url"
-      type: string
-      description: Link to the original source of the dataset.
-      required: false
-    - name: "--dataset_reference"
-      type: string
-      description: Bibtex reference of the paper in which the dataset was published.
-      required: false
-    - name: "--dataset_summary"
-      type: string
-      description: Short description of the dataset.
-      required: true
-    - name: "--dataset_description"
-      type: string
-      description: Long description of the dataset.
-      required: true
-    - name: "--dataset_organism"
-      type: string
-      description: The organism of the dataset.
-      required: true
-  resources:
-    - type: nextflow_script
-      path: main.nf
-      entrypoint: run_wf
-  dependencies:
-    - name: process_dataset/compute_pseudobulk
-    - name: process_dataset/filter_obs
-    - name: process_dataset/filter_vars
-    - name: process_dataset/add_uns_metadata
-    - name: process_dataset/run_limma
-    - name: process_dataset/generate_id_map
-platforms:
+name: process_dataset
+namespace: "workflows"
+arguments:
+  - name: "--dataset_id"
+    type: string
+    description: Unique identifier of the dataset.
+    required: true
+  - name: "--dataset_name"
+    type: string
+    description: Nicely formatted name.
+    required: true
+  - name: "--dataset_url"
+    type: string
+    description: Link to the original source of the dataset.
+    required: false
+  - name: "--dataset_reference"
+    type: string
+    description: Bibtex reference of the paper in which the dataset was published.
+    required: false
+  - name: "--dataset_summary"
+    type: string
+    description: Short description of the dataset.
+    required: true
+  - name: "--dataset_description"
+    type: string
+    description: Long description of the dataset.
+    required: true
+  - name: "--dataset_organism"
+    type: string
+    description: The organism of the dataset.
+    required: true
+resources:
+  - type: nextflow_script
+    path: main.nf
+    entrypoint: run_wf
+dependencies:
+  - name: process_dataset/compute_pseudobulk
+  - name: process_dataset/filter_obs
+  - name: process_dataset/filter_vars
+  - name: process_dataset/add_uns_metadata
+  - name: process_dataset/run_limma
+  - name: process_dataset/generate_id_map
+runners:
+  - type: executable
   - type: nextflow
     directives:
       label: [ midtime, midmem, lowcpu ]
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
index 56171aba..0523f5dd 100644
--- a/src/workflows/run_benchmark/config.vsh.yaml
+++ b/src/workflows/run_benchmark/config.vsh.yaml
@@ -1,93 +1,93 @@
-functionality:
-  name: "run_benchmark"
-  namespace: "workflows"
-  argument_groups:
-    - name: Inputs
-      arguments:
-        - name: "--de_train_h5ad"
-          __merge__: ../../api/file_de_train_h5ad.yaml
-          required: true
-          direction: input
-        - name: "--de_test_h5ad"
-          __merge__: ../../api/file_de_test_h5ad.yaml
-          required: true
-          direction: input
-        - name: "--id_map"
-          __merge__: ../../api/file_id_map.yaml
-          required: true
-          direction: input
-        - name: --layer
-          type: string
-          direction: input
-          default: clipped_sign_log10_pval
-          description: Which layer to use for prediction and evaluation.
-    - name: Outputs
-      arguments:
-        - name: "--scores"
-          type: file
-          required: true
-          direction: output
-          description: A yaml file containing the scores of each of the methods
-          default: score_uns.yaml
-        - name: "--method_configs"
-          type: file
-          required: true
-          direction: output
-          default: method_configs.yaml
-        - name: "--metric_configs"
-          type: file
-          required: true
-          direction: output
-          default: metric_configs.yaml
-        - name: "--dataset_uns"
-          type: file
-          required: true
-          direction: output
-          default: dataset_uns.yaml
-        - name: "--task_info"
-          type: file
-          required: true
-          direction: output
-          default: task_info.yaml
-    - name: Arguments
-      arguments:
-        - name: "--method_ids"
-          type: string
-          multiple: true
-          description: A list of method ids to run. If not specified, all methods will be run.
-        - name: "--metric_ids"
-          type: string
-          multiple: true
-          description: A list of metric ids to run. If not specified, all metric will be run.
-  resources:
-    - type: nextflow_script
-      path: main.nf
-      entrypoint: run_wf
-    - type: file
-      path: "../../api/task_info.yaml"
-  dependencies:
-    - name: common/extract_metadata
-      repository: openproblemsv2
-    - name: control_methods/zeros
-    - name: control_methods/sample
-    - name: control_methods/ground_truth
-    - name: control_methods/mean_outcome
-    - name: control_methods/mean_across_celltypes
-    - name: control_methods/mean_across_compounds
-    - name: methods/nn_retraining_with_pseudolabels
-    - name: methods/scape
-    - name: methods/jn_ap_op2
-    - name: methods/lgc_ensemble
-    - name: methods/transformer_ensemble
-    - name: methods/pyboost
-    - name: metrics/mean_rowwise_error
-    - name: metrics/mean_rowwise_correlation
-  repositories:
-    - name: openproblemsv2
-      type: github
-      repo: openproblems-bio/openproblems-v2
-      tag: main_build
-platforms:
+name: "run_benchmark"
+namespace: "workflows"
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: "--de_train_h5ad"
+        __merge__: ../../api/file_de_train_h5ad.yaml
+        required: true
+        direction: input
+      - name: "--de_test_h5ad"
+        __merge__: ../../api/file_de_test_h5ad.yaml
+        required: true
+        direction: input
+      - name: "--id_map"
+        __merge__: ../../api/file_id_map.yaml
+        required: true
+        direction: input
+      - name: --layer
+        type: string
+        direction: input
+        default: clipped_sign_log10_pval
+        description: Which layer to use for prediction and evaluation.
+  - name: Outputs
+    arguments:
+      - name: "--scores"
+        type: file
+        required: true
+        direction: output
+        description: A yaml file containing the scores of each of the methods
+        default: score_uns.yaml
+      - name: "--method_configs"
+        type: file
+        required: true
+        direction: output
+        default: method_configs.yaml
+      - name: "--metric_configs"
+        type: file
+        required: true
+        direction: output
+        default: metric_configs.yaml
+      - name: "--dataset_uns"
+        type: file
+        required: true
+        direction: output
+        default: dataset_uns.yaml
+      - name: "--task_info"
+        type: file
+        required: true
+        direction: output
+        default: task_info.yaml
+  - name: Arguments
+    arguments:
+      - name: "--method_ids"
+        type: string
+        multiple: true
+        description: A list of method ids to run. If not specified, all methods will be run.
+      - name: "--metric_ids"
+        type: string
+        multiple: true
+        description: A list of metric ids to run. If not specified, all metric will be run.
+resources:
+  - type: nextflow_script
+    path: main.nf
+    entrypoint: run_wf
+  - type: file
+    path: "../../api/task_info.yaml"
+dependencies:
+  - name: common/extract_metadata
+    repository: openproblemsv2
+  - name: control_methods/zeros
+  - name: control_methods/sample
+  - name: control_methods/ground_truth
+  - name: control_methods/mean_outcome
+  - name: control_methods/mean_across_celltypes
+  - name: control_methods/mean_across_compounds
+  - name: methods/nn_retraining_with_pseudolabels
+  - name: methods/scape
+  - name: methods/jn_ap_op2
+  - name: methods/lgc_ensemble
+  - name: methods/transformer_ensemble
+  - name: methods/pyboost
+  - name: metrics/mean_rowwise_error
+  - name: metrics/mean_rowwise_correlation
+repositories:
+  - name: openproblemsv2
+    type: github
+    repo: openproblems-bio/openproblems-v2
+    tag: main_build
+runners:
+  - type: executable
   - type: nextflow
     config:
       script: |
diff --git a/src/workflows/run_stability_analysis/config.vsh.yaml b/src/workflows/run_stability_analysis/config.vsh.yaml
index 05a7d6fa..3e50d3f6 100644
--- a/src/workflows/run_stability_analysis/config.vsh.yaml
+++ b/src/workflows/run_stability_analysis/config.vsh.yaml
@@ -1,87 +1,87 @@
-functionality:
-  name: "run_stability_analysis"
-  namespace: "workflows"
-  argument_groups:
-    - name: Inputs
-      arguments:
-        - name: --sc_counts
-          __merge__: ../../api/file_sc_counts.yaml
-          required: true
-          direction: input
-        - name: "--id"
-          type: string
-          description: Unique identifier of the dataset.
-          required: true
-        - name: --layer
-          type: string
-          direction: input
-          default: clipped_sign_log10_pval
-          description: Which layer to use for prediction and evaluation.
-    - name: Bootstrapping arguments
-      description: Define the sampling strategy for the stability analysis.
-      arguments:
-        - name: --bootstrap_num_replicates
-          type: integer
-          default: 10
-          description: Number of bootstrap replicates to run.
-        - name: --bootstrap_obs
-          type: boolean
-          default: true
-          description: Whether to sample observations.
-        - name: --bootstrap_obs_fraction
-          type: double
-          default: 1
-          description: Fraction of the obs of the sc_counts to include in each bootstrap.
-        - name: --bootstrap_obs_replace
-          type: boolean
-          default: true
-          description: Whether to sample with replacement.
-        - name: --bootstrap_var
-          type: boolean
-          default: false
-          description: Whether to sample variables.
-        - name: --bootstrap_var_fraction
-          type: double
-          default: 1
-          description: Fraction of the var of the sc_counts to include in each bootstrap.
-        - name: --bootstrap_var_replace
-          type: boolean
-          default: true
-          description: Whether to sample with replacement.
-    - name: Outputs
-      arguments:
-        - name: "--scores"
-          type: file
-          required: true
-          direction: output
-          description: A yaml file containing the scores of each of the methods
-          default: stability_uns.yaml
-    - name: Arguments
-      arguments:
-        - name: "--method_ids"
-          type: string
-          multiple: true
-          description: A list of method ids to run. If not specified, all methods will be run.
-        - name: "--metric_ids"
-          type: string
-          multiple: true
-          description: A list of metric ids to run. If not specified, all metric will be run.
-  resources:
-    - type: nextflow_script
-      path: main.nf
-      entrypoint: run_wf
-    - type: file
-      path: "../../api/task_info.yaml"
-  dependencies:
-    - name: process_dataset/bootstrap
-    - name: workflows/process_dataset
-    - name: workflows/run_benchmark
-  repositories:
-    - name: openproblemsv2
-      type: github
-      repo: openproblems-bio/openproblems-v2
-      tag: main_build
-platforms:
+name: "run_stability_analysis"
+namespace: "workflows"
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --sc_counts
+        __merge__: ../../api/file_sc_counts.yaml
+        required: true
+        direction: input
+      - name: "--id"
+        type: string
+        description: Unique identifier of the dataset.
+        required: true
+      - name: --layer
+        type: string
+        direction: input
+        default: clipped_sign_log10_pval
+        description: Which layer to use for prediction and evaluation.
+  - name: Bootstrapping arguments
+    description: Define the sampling strategy for the stability analysis.
+    arguments:
+      - name: --bootstrap_num_replicates
+        type: integer
+        default: 10
+        description: Number of bootstrap replicates to run.
+      - name: --bootstrap_obs
+        type: boolean
+        default: true
+        description: Whether to sample observations.
+      - name: --bootstrap_obs_fraction
+        type: double
+        default: 1
+        description: Fraction of the obs of the sc_counts to include in each bootstrap.
+      - name: --bootstrap_obs_replace
+        type: boolean
+        default: true
+        description: Whether to sample with replacement.
+      - name: --bootstrap_var
+        type: boolean
+        default: false
+        description: Whether to sample variables.
+      - name: --bootstrap_var_fraction
+        type: double
+        default: 1
+        description: Fraction of the var of the sc_counts to include in each bootstrap.
+      - name: --bootstrap_var_replace
+        type: boolean
+        default: true
+        description: Whether to sample with replacement.
+  - name: Outputs
+    arguments:
+      - name: "--scores"
+        type: file
+        required: true
+        direction: output
+        description: A yaml file containing the scores of each of the methods
+        default: stability_uns.yaml
+  - name: Arguments
+    arguments:
+      - name: "--method_ids"
+        type: string
+        multiple: true
+        description: A list of method ids to run. If not specified, all methods will be run.
+      - name: "--metric_ids"
+        type: string
+        multiple: true
+        description: A list of metric ids to run. If not specified, all metric will be run.
+resources:
+  - type: nextflow_script
+    path: main.nf
+    entrypoint: run_wf
+  - type: file
+    path: "../../api/task_info.yaml"
+dependencies:
+  - name: process_dataset/bootstrap
+  - name: workflows/process_dataset
+  - name: workflows/run_benchmark
+repositories:
+  - name: openproblemsv2
+    type: github
+    repo: openproblems-bio/openproblems-v2
+    tag: main_build
+runners:
+  - type: executable
   - type: nextflow
     config:
       script: |