From 781ff5b76ba6c4c2d80dcbbec9983e147613cc71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?=
 <57830279+cosminc98@users.noreply.github.com>
Date: Mon, 12 Feb 2024 17:29:26 +0100
Subject: [PATCH] Feature: Passing arguments to NVCC compiler (#26)

* Add option to give nvcc extra arguments

* Add test for nvcc options that changes c++ dialect from c++17 to c++14

* Add make and the english language pack to devcontainer to be able to build the documentation

* Update documentation config to automatically import the current version of the package

* Document new --compiler-args argument

* Improve tests coverage by testing for bad arguments and the error output during a failed compilation

* Add IPython to docs requirements to allow the __version__ import for readthedocs env

* Change devcontainer base image to have the latest CUDA toolkit

* Mock the nsight compute tool with a bash script

* Add test to compile with opencv

* Add new page to documentation that contains a new notebook that explains compiling with external libraries

* Add autodocstring vscode extension to devcontainer

* Add function that modifies the default profiler/compiler arguments to allow reusing them in multiple magic command calls

* Update pylint exceptions

* Update contributing instructions

* Change version from 1.0.3 to 1.1.0 due to adding features in a backward-compatible manner

* Install latest CUDA toolkit on the test runner to pass the OpenCV compilation test

* Install opencv in test runner and update code coverage install

* Add CUDA bin to PATH in test and coverage runners

* Add cuda bin to path variable in .bashrc

* Update way to set environment variable PATH in github action

* Change devcontainer base image back to ubuntu:22.04 to match the environment from the test runner
---
 .devcontainer/Dockerfile          |  26 +++++--
 .devcontainer/devcontainer.json   |   6 +-
 .github/workflows/test.yml        |  31 +++++---
 README.md                         |  12 +--
 docs/requirements.txt             |   1 +
 docs/source/conf.py               |  11 ++-
 docs/source/index.rst             |   1 +
 docs/source/magics.rst            |  25 ++++++-
 docs/source/notebooks.rst         |  34 +++++++++
 docs/source/usage.rst             |  44 +++++++++++
 nvcc4jupyter/__init__.py          |   3 +-
 nvcc4jupyter/parsers.py           |  50 ++++++++++++-
 nvcc4jupyter/plugin.py            |  29 ++++----
 pyproject.toml                    |   4 +-
 tests/fixtures/compiler/cpp_17.cu |  47 ++++++++++++
 tests/fixtures/compiler/opencv.cu |   8 ++
 tests/fixtures/fixtures.py        |  17 ++++-
 tests/fixtures/scripts/ncu        |   7 ++
 tests/test_plugin.py              | 119 +++++++++++++++++++++++++++++-
 19 files changed, 424 insertions(+), 51 deletions(-)
 create mode 100644 docs/source/notebooks.rst
 create mode 100644 tests/fixtures/compiler/cpp_17.cu
 create mode 100644 tests/fixtures/compiler/opencv.cu
 create mode 100755 tests/fixtures/scripts/ncu

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 9088efc..f5e11b2 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -1,15 +1,29 @@
-FROM ubuntu
+FROM ubuntu:22.04
 
 ARG VENV_PATH=/opt/dev-venv
 ENV VENV_ACTIVATE=${VENV_PATH}/bin/activate
+ENV DEBIAN_FRONTEND="noninteractive"
 
+# install the latest CUDA toolkit (https://developer.nvidia.com/cuda-downloads)
 RUN apt update
-RUN apt install -y python3.10-venv nvidia-cuda-toolkit gcc vim git
+RUN apt install -y wget
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+RUN dpkg -i cuda-keyring_1.1-1_all.deb
+RUN apt update
+RUN apt -y install cuda-toolkit-12-3
+RUN echo "PATH=\"\$PATH:/usr/local/cuda/bin\"" >> ~/.bashrc
+
+# install OpenCV to test compilation with external libraries
+RUN apt install -y libopencv-dev pkg-config
 
-# the mkdir command bypasses a profiler error, which allows us to run it with
-# host code only to at least check that the profiler parameters are correctly
-# provided; without this line, some tests will fail
-RUN mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections
+# make & language-pack-en are for documentation
+RUN apt install -y \
+    gcc \
+    git \
+    language-pack-en \
+    make \
+    python3.10-venv \
+    vim
 
 # we create the virtualenv here so that the devcontainer.json setting
 # python.defaultInterpreterPath can be used to find it; if we do it in the
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index c6e997c..ad02373 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -16,10 +16,12 @@
                 "ms-python.isort",
                 "ms-python.flake8",
                 "ms-python.black-formatter",
-                "ryanluker.vscode-coverage-gutters"
+                "ryanluker.vscode-coverage-gutters",
+                "njpwerner.autodocstring"
             ],
             "settings": {
-                "python.defaultInterpreterPath": "/opt/dev-venv/bin/python"
+                "python.defaultInterpreterPath": "/opt/dev-venv/bin/python",
+                "autoDocstring.docstringFormat": "google-notypes"
             }
         }
     }
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6fd78e1..9eeb8cb 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -27,14 +27,19 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
 
-      # the mkdir command bypasses a profiler error, which allows us to run it
-      # with host code only to at least check that the profiler parameters are
-      # correctly provided
-      - name: Install CUDA tools
+      - name: Install CUDA toolkit
         run: |
           sudo apt update
-          sudo apt install nvidia-cuda-toolkit
-          sudo mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections
+          sudo apt install -y wget
+          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+          sudo dpkg -i cuda-keyring_1.1-1_all.deb
+          sudo apt update
+          sudo apt -y install cuda-toolkit-12-3
+          echo "PATH=$PATH:/usr/local/cuda/bin" >> $GITHUB_ENV
+
+      - name: Install OpenCV
+        run: |
+          sudo apt install -y libopencv-dev pkg-config
 
       - name: Install Python dependencies
         run: |
@@ -65,11 +70,19 @@ jobs:
         with:
           python-version: "3.10"
 
-      - name: Install CUDA tools
+      - name: Install CUDA toolkit
         run: |
           sudo apt update
-          sudo apt install nvidia-cuda-toolkit
-          sudo mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections
+          sudo apt install -y wget
+          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+          sudo dpkg -i cuda-keyring_1.1-1_all.deb
+          sudo apt update
+          sudo apt -y install cuda-toolkit-12-3
+          echo "PATH=$PATH:/usr/local/cuda/bin" >> $GITHUB_ENV
+
+      - name: Install OpenCV
+        run: |
+          sudo apt install -y libopencv-dev pkg-config
 
       - name: Install Python dependencies
         run: |
diff --git a/README.md b/README.md
index ff6d5f9..cfdbee2 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,7 @@ Here are just a few of the things that nvcc4jupyter does well:
 
   - [Easily run CUDA C++ code](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#hello-world)
   - [Profile your code with NVIDIA Nsight Compute](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#profiling)
+  - [Compile your code with external libraries (e.g. OpenCV)](https://nvcc4jupyter.readthedocs.io/en/latest/notebooks.html#compiling-with-external-libraries)
   - [Share code between different programs in the same notebook / split your code into multiple files for improved readability](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#groups)
 
 ## Install
@@ -88,13 +89,14 @@ The official documentation is hosted on [readthedocs](https://nvcc4jupyter.readt
 
 ## Contributing
 
-Install the package with the development dependencies:
-```bash
-pip install .[dev]
-```
+The recommended setup for development is using the devcontainer in GitHub
+Codespaces or locally in VSCode.
 
-As a developer, make sure you install the pre-commit hook before commiting any changes:
+If not using the devcontainer you need to install the package with the
+development dependencies and install the pre-commit hook before commiting any
+changes:
 ```bash
+pip install .[dev]
 pre-commit install
 ```
 
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 53fc1f3..4a750cb 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,2 +1,3 @@
 sphinx==7.1.2
 sphinx-rtd-theme==1.3.0rc1
+IPython>=8.19.0
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 665059c..2e5d3b3 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -6,11 +6,18 @@
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
+import os
+import sys
+
+sys.path.append(os.path.join("..", ".."))
+from nvcc4jupyter.__init__ import __version__  # noqa: E402
+
 project = "nvcc4jupyter"
 copyright = "2024, Andrei Nechaev & Cosmin Stefan Ciocan"
 author = "Andrei Nechaev & Cosmin Stefan Ciocan"
-release = "1.0.1"
-version = "1.0.1"
+release = __version__
+version = __version__
+
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 1f07bdd..3ed1746 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -10,4 +10,5 @@ which provides CUDA capable GPUs with the CUDA toolkit already installed.
    :caption: Contents:
 
    usage
+   notebooks
    magics
diff --git a/docs/source/magics.rst b/docs/source/magics.rst
index 2073f35..28a3bf1 100644
--- a/docs/source/magics.rst
+++ b/docs/source/magics.rst
@@ -21,24 +21,40 @@ Usage
    - ``%%cuda``: Compile and run this cell.
    - ``%%cuda -p``: Also runs the Nsight Compute profiler.
    - ``%%cuda -p -a "<SPACE SEPARATED PROFILER ARGS>"``: Also runs the Nsight Compute profiler.
+   - ``%%cude -c "<SPACE SEPARATED COMPILER ARGS"``: Passes additional arguments to "nvcc".
    - ``%%cuda -t``: Outputs the "timeit" built-in magic results.
 
 Options
 -------
 
+.. _timeit:
+
 -t, --timeit
    Boolean. If set, returns the output of the "timeit" built-in
    ipython magic instead of stdout.
 
+.. _profile:
+
 -p, --profile
    Boolean. If set, runs the NVIDIA Nsight Compute profiler whose
    output is appended to standard output.
 
+.. _profiler_args:
+
 -a, --profiler-args
    String. Optional profiler arguments that can be space separated
    by wrapping them in double quotes. See all options here:
    `Nsight Compute CLI <https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html#command-line-options>`_
 
+.. _compiler_args:
+
+-c, --compiler-args
+   String. Optional compiler arguments that can be space separated
+   by wrapping them in double quotes. They will be passed to "nvcc".
+   See all options here:
+   `NVCC Options <https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#nvcc-command-options>`_
+
+
 .. note::
    If both "\-\-profile" and "\-\-timeit" are used then no profiling is
    done.
@@ -47,10 +63,11 @@ Examples
 --------
 ::
 
-   # compile, run, and profile the code in the cell with the Nsight
-   # compute profiler while collecting only metrics from the
-   # "MemoryWorkloadAnalysis" section.
-   %%cuda --profile --profiler-args "--section MemoryWorkloadAnalysis"
+   # compile, run, and profile the code in the cell with the Nsight compute
+   # profiler while collecting only metrics from the "MemoryWorkloadAnalysis"
+   # section; also provides the "--optimize 3" option to "nvcc" during
+   # compilation to optimize host code
+   %%cuda -p -a "--section MemoryWorkloadAnalysis" -c "--optimize 3"
 
 ------
 
diff --git a/docs/source/notebooks.rst b/docs/source/notebooks.rst
new file mode 100644
index 0000000..a662ef4
--- /dev/null
+++ b/docs/source/notebooks.rst
@@ -0,0 +1,34 @@
+*********
+Notebooks
+*********
+
+This page provides a list of useful Jupyter notebooks written with the
+**nvcc4jupyter** library.
+
+.. note::
+   These notebooks are written for Google's Colab, but you may run them in
+   other environments by installing all expected dependencies. If running in
+   Colab, make sure to set the runtime type to a GPU instance (at the time of
+   writing this, T4 is the GPU offered for free by Colab).
+
+------
+
+.. _compiling_with_external_libraries:
+
+Compiling with external libraries
+=================================
+
+[`NOTEBOOK <https://colab.research.google.com/drive/1iuY46DCwv4hy3SqDhJgFeO8kgpHnzjTh?usp=sharing>`_]
+
+If you need to compile CUDA C++ code that uses external libraries in the host
+code (e.g. OpenCV for reading and writing images to disk) then this section is
+for you.
+
+To achieve this, use the :ref:`compiler-args <compiler_args>` option of the
+:ref:`cuda <cuda_magic>` magic command to pass the correct compiler options
+of the OpenCV library to **nvcc** for it to link the OpenCV code with the
+code in your Jupyter cell. Those compiler options can be provided by the
+`pkg-config <https://www.freedesktop.org/wiki/Software/pkg-config/>`_ tool.
+
+In the notebook we show how to use OpenCV to load an image, blur it with a CUDA
+kernel, and then save it back to disk using OpenCV again.
diff --git a/docs/source/usage.rst b/docs/source/usage.rst
index 38ff35c..50fe879 100644
--- a/docs/source/usage.rst
+++ b/docs/source/usage.rst
@@ -255,3 +255,47 @@ Running the cell above will compile and execute the vector addition code in the
     SM Active Cycles                cycle       383.65
     Compute (SM) Throughput             %         1.19
     ----------------------- ------------- ------------
+
+Compiler arguments
+------------------
+
+In the same way profiler arguments can be passed to the profiling tool,
+compiling arguments can be passed to **nvcc**:
+
+.. code-block:: c++
+
+    %cuda_group_run --group "vector_add" --compiler-args "--optimize 3"
+
+Running the cell above will compile and execute the vector addition code in the
+"vector_add" group. During compilation, **nvcc** receives the "\-\-optimize"
+option which specifies the optimization level for host code.
+
+Set default arguments
+---------------------
+
+In the case where you execute multiple magic commands with the same compiler or
+profiler arguments you can avoid writing them every time by setting the default
+arguments:
+
+.. code-block:: python
+
+    from nvcc4jupyter import set_defaults
+    set_defaults(compiler_args="--optimize 3", profiler_args="--section SpeedOfLight")
+
+The same effect can be achieved by running "set_defaults" once for each config
+due to the fact that the default value is not changed if an a value is not
+given to the "set_defaults" function.
+
+.. code-block:: python
+
+    from nvcc4jupyter import set_defaults
+    set_defaults(compiler_args="--optimize 3")
+    set_defaults(profiler_args="--section SpeedOfLight")
+
+
+Now we can run the following cell without specifying the compiler and profiler
+arguments once again.
+
+.. code-block:: c++
+
+    %cuda_group_run --group "vector_add" --profile
diff --git a/nvcc4jupyter/__init__.py b/nvcc4jupyter/__init__.py
index 97b8902..356eb20 100644
--- a/nvcc4jupyter/__init__.py
+++ b/nvcc4jupyter/__init__.py
@@ -2,6 +2,7 @@
 nvcc4jupyter: CUDA C++ plugin for Jupyter Notebook
 """
 
+from .parsers import set_defaults  # noqa: F401
 from .plugin import NVCCPlugin, load_ipython_extension  # noqa: F401
 
-__version__ = "1.0.3"
+__version__ = "1.1.0"
diff --git a/nvcc4jupyter/parsers.py b/nvcc4jupyter/parsers.py
index e94afce..a35e49f 100644
--- a/nvcc4jupyter/parsers.py
+++ b/nvcc4jupyter/parsers.py
@@ -3,6 +3,39 @@
 """
 
 import argparse
+from typing import Callable, Optional
+
+_default_profiler_args: str = ""
+_default_compiler_args: str = ""
+
+
+def set_defaults(
+    compiler_args: Optional[str] = None, profiler_args: Optional[str] = None
+) -> None:
+    """
+    Set the default values for various arguments of the magic commands. These
+    values will be used if the user does not explicitly provide those arguments
+    to override this behaviour on a cell by cell basis.
+
+    Args:
+        compiler_args: If not None, this value becomes the new default compiler
+            config. Defaults to "".
+        profiler_args: If not None, this value becomes the new default profiler
+            config. Defaults to "".
+    """
+
+    # pylint: disable=global-statement
+    global _default_compiler_args
+    global _default_profiler_args
+    if compiler_args is not None:
+        _default_compiler_args = compiler_args
+    if profiler_args is not None:
+        _default_profiler_args = profiler_args
+
+
+def str_to_lambda(arg: str) -> Callable[[], str]:
+    """Convert argparse string to lambda"""
+    return lambda: arg
 
 
 def get_parser_cuda() -> argparse.ArgumentParser:
@@ -18,7 +51,22 @@ def get_parser_cuda() -> argparse.ArgumentParser:
     )
     parser.add_argument("-t", "--timeit", action="store_true")
     parser.add_argument("-p", "--profile", action="store_true")
-    parser.add_argument("-a", "--profiler-args", type=str, default="")
+
+    # --profiler-args and --compiler-args values are lambda functions to allow
+    # changing the default value at runtime
+    parser.add_argument(
+        "-a",
+        "--profiler-args",
+        type=str_to_lambda,
+        default=lambda: _default_profiler_args,
+    )
+    parser.add_argument(
+        "-c",
+        "--compiler-args",
+        type=str_to_lambda,
+        default=lambda: _default_compiler_args,
+    )
+
     return parser
 
 
diff --git a/nvcc4jupyter/plugin.py b/nvcc4jupyter/plugin.py
index 269a2cc..1da4f63 100644
--- a/nvcc4jupyter/plugin.py
+++ b/nvcc4jupyter/plugin.py
@@ -87,7 +87,10 @@ def _delete_group(self, group_name: str) -> None:
             shutil.rmtree(group_dirpath)
 
     def _compile(
-        self, group_name: str, executable_fname: str = DEFAULT_EXEC_FNAME
+        self,
+        group_name: str,
+        executable_fname: str = DEFAULT_EXEC_FNAME,
+        compiler_args: str = "",
     ) -> str:
         """
         Compiles all source files in a given group together with all source
@@ -97,6 +100,7 @@ def _compile(
             group_name: The name of the source file group to be compiled.
             executable_fname: The output executable file name. Defaults to
                 "cuda_exec.out".
+            compiler_args: The optional "nvcc" compiler arguments.
 
         Raises:
             RuntimeError: If the group does not exist or if does not have any
@@ -121,18 +125,12 @@ def _compile(
 
         executable_fpath = os.path.join(group_dirpath, executable_fname)
 
-        args = [
-            "nvcc",
-            "-I" + shared_dirpath + "," + group_dirpath,
-        ]
+        args = ["nvcc"]
+        args.extend(compiler_args.split())
+        args.append("-I" + shared_dirpath + "," + group_dirpath)
         args.extend(source_files)
-        args.extend(
-            [
-                "-o",
-                executable_fpath,
-                "-Wno-deprecated-gpu-targets",
-            ]
-        )
+        args.extend(["-o", executable_fpath, "-Wno-deprecated-gpu-targets"])
+
         subprocess.check_output(args, stderr=subprocess.STDOUT)
 
         return executable_fpath
@@ -188,12 +186,15 @@ def _compile_and_run(
         self, group_name: str, args: argparse.Namespace
     ) -> str:
         try:
-            exec_fpath = self._compile(group_name)
+            exec_fpath = self._compile(
+                group_name=group_name,
+                compiler_args=args.compiler_args(),
+            )
             output = self._run(
                 exec_fpath=exec_fpath,
                 timeit=args.timeit,
                 profile=args.profile,
-                profiler_args=args.profiler_args,
+                profiler_args=args.profiler_args(),
             )
         except subprocess.CalledProcessError as e:
             output = e.output.decode("utf8")
diff --git a/pyproject.toml b/pyproject.toml
index 71966ef..2bc6d1d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -286,6 +286,6 @@ deprecated-modules="optparse,tkinter.tix"
 
 [tool.pylint.'EXCEPTIONS']
 overgeneral-exceptions= [
-    "BaseException",
-    "Exception"
+    "builtins.BaseException",
+    "builtins.Exception"
 ]
diff --git a/tests/fixtures/compiler/cpp_17.cu b/tests/fixtures/compiler/cpp_17.cu
new file mode 100644
index 0000000..6aedd6f
--- /dev/null
+++ b/tests/fixtures/compiler/cpp_17.cu
@@ -0,0 +1,47 @@
+#include <cstdlib>
+#include <iostream>
+#include <set>
+#include <string>
+#include <iterator>
+
+#include <tuple>
+
+struct S {
+    int n;
+    std::string s;
+    float d;
+    bool operator<(const S& rhs) const
+    {
+        // compares n to rhs.n,
+        // then s to rhs.s,
+        // then d to rhs.d
+        return std::tie(n, s, d) < std::tie(rhs.n, rhs.s, rhs.d);
+    }
+};
+
+int main()
+{
+    std::set<S> mySet;
+
+    // pre C++17:
+    {
+	    S value{42, "Test", 3.14};
+	    std::set<S>::iterator iter;
+	    bool inserted;
+
+	    // unpacks the return val of insert into iter and inserted
+	    std::tie(iter, inserted) = mySet.insert(value);
+
+	    if (inserted)
+		    std::cout << "Value was inserted\n";
+    }
+
+	// with C++17:
+    {
+        S value{100, "abc", 100.0};
+        const auto [iter, inserted] = mySet.insert(value);
+
+        if (inserted)
+		    std::cout << "Value(" << iter->n << ", " << iter->s << ", ...) was inserted" << "\n";
+    }
+}
diff --git a/tests/fixtures/compiler/opencv.cu b/tests/fixtures/compiler/opencv.cu
new file mode 100644
index 0000000..75380ee
--- /dev/null
+++ b/tests/fixtures/compiler/opencv.cu
@@ -0,0 +1,8 @@
+#include <opencv2/core.hpp>
+#include <iostream>
+
+int main(int argc, char** argv)
+{
+    std::cout << cv::getBuildInformation() << std::endl;
+    return 0;
+}
diff --git a/tests/fixtures/fixtures.py b/tests/fixtures/fixtures.py
index 93b88fb..ca8248d 100644
--- a/tests/fixtures/fixtures.py
+++ b/tests/fixtures/fixtures.py
@@ -27,10 +27,25 @@ def fixtures_path(tests_path):
     return os.path.join(tests_path, "fixtures")
 
 
+@pytest.fixture(scope="session")
+def scripts_path(fixtures_path: str):
+    return os.path.join(fixtures_path, "scripts")
+
+
+@pytest.fixture(scope="session")
+def compiler_cpp_17_fpath(fixtures_path: str):
+    return os.path.join(fixtures_path, "compiler", "cpp_17.cu")
+
+
+@pytest.fixture(scope="session")
+def compiler_opencv_fpath(fixtures_path: str):
+    return os.path.join(fixtures_path, "compiler", "opencv.cu")
+
+
 @pytest.fixture(scope="session")
 def sample_magic_cu_line():
     # fmt: off
-    return '--profile --profiler-args "--metrics l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum"'  # noqa: E501
+    return '--profile --profiler-args "--metrics l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum" --compiler-args "--optimize 3"'  # noqa: E501
     # fmt: on
 
 
diff --git a/tests/fixtures/scripts/ncu b/tests/fixtures/scripts/ncu
new file mode 100755
index 0000000..4d059e5
--- /dev/null
+++ b/tests/fixtures/scripts/ncu
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+# this is a mock of nsight compute cli tool that just executes the program
+# given as the last argument
+"${@: -1}"
+
+echo "==WARNING== No kernels were profiled"
diff --git a/tests/test_plugin.py b/tests/test_plugin.py
index 05d340e..4c6120b 100644
--- a/tests/test_plugin.py
+++ b/tests/test_plugin.py
@@ -3,10 +3,12 @@
 import os
 import re
 import shutil
+import subprocess
 from typing import List
 
 import pytest
 
+from nvcc4jupyter.parsers import get_parser_cuda, set_defaults
 from nvcc4jupyter.plugin import NVCCPlugin
 
 
@@ -36,11 +38,19 @@ def copy_source_to_group(
     return destination_fpath
 
 
+@pytest.fixture(autouse=True, scope="session")
+def before_all(scripts_path: str):
+    os.environ["PATH"] = scripts_path + os.pathsep + os.environ["PATH"]
+
+
 @pytest.fixture(autouse=True, scope="function")
 def before_each(plugin: NVCCPlugin):
-    shutil.rmtree(plugin.workdir, ignore_errors=True)  # before test
+    # BEFORE TESTS
+    set_defaults(compiler_args="", profiler_args="")
+    shutil.rmtree(plugin.workdir, ignore_errors=True)
     yield
-    pass  # after test
+    # AFTER TESTS
+    pass
 
 
 def test_save_source(plugin: NVCCPlugin, sample_cuda_code: str) -> None:
@@ -88,6 +98,62 @@ def test_compile(
         plugin._compile(gname)
 
 
+def test_compile_args(
+    plugin: NVCCPlugin,
+    compiler_cpp_17_fpath: str,
+):
+    gname = "test_compile_args"
+    copy_source_to_group(compiler_cpp_17_fpath, gname, plugin.workdir)
+
+    exec_fpath = plugin._compile(gname, compiler_args="--std c++17")
+    assert os.path.exists(exec_fpath)
+
+    # should fail due to the source file having c++ 17 features
+    with pytest.raises(subprocess.CalledProcessError):
+        exec_fpath = plugin._compile(gname, compiler_args="--std c++14")
+
+    output = plugin._compile_and_run(
+        group_name=gname,
+        args=argparse.Namespace(
+            timeit=False,
+            profile=True,
+            profiler_args=lambda: "",
+            compiler_args=lambda: "--std c++14",
+        ),
+    )
+    assert "errors detected in the compilation of" in output
+
+
+def test_compile_opencv(
+    plugin: NVCCPlugin,
+    compiler_opencv_fpath: str,
+):
+    gname = "test_compile_opencv"
+    copy_source_to_group(compiler_opencv_fpath, gname, plugin.workdir)
+
+    # check that "pkg-config" exists
+    assert subprocess.check_call(["which", "pkg-config"]) == 0
+
+    opencv_compile_options = (
+        subprocess.check_output(
+            args=["pkg-config", "--cflags", "--libs", "opencv4"]
+        )
+        .decode()
+        .strip()
+    )
+
+    output = plugin._compile_and_run(
+        group_name=gname,
+        args=argparse.Namespace(
+            timeit=False,
+            profile=True,
+            profiler_args=lambda: "",
+            compiler_args=lambda: opencv_compile_options,
+        ),
+    )
+    assert "General configuration for OpenCV" in output
+
+
 def test_run(
     plugin: NVCCPlugin,
     sample_cuda_fpath: str,
@@ -143,7 +209,13 @@ def test_compile_and_run_multiple_files(
     for fpath in multiple_source_fpaths:
         copy_source_to_group(fpath, gname, plugin.workdir)
     output = plugin._compile_and_run(
-        gname, argparse.Namespace(timeit=False, profile=True, profiler_args="")
+        group_name=gname,
+        args=argparse.Namespace(
+            timeit=False,
+            profile=True,
+            profiler_args=lambda: "",
+            compiler_args=lambda: "",
+        ),
     )
     check_profiler_output(output)
 
@@ -165,7 +237,13 @@ def test_compile_and_run_multiple_files_shared(
         else:
             copy_source_to_group(fpath, "shared", plugin.workdir)
     output = plugin._compile_and_run(
-        gname, argparse.Namespace(timeit=False, profile=True, profiler_args="")
+        group_name=gname,
+        args=argparse.Namespace(
+            timeit=False,
+            profile=True,
+            profiler_args=lambda: "",
+            compiler_args=lambda: "",
+        ),
     )
     check_profiler_output(output)
 
@@ -181,6 +259,29 @@ def test_read_args(plugin: NVCCPlugin):
     assert math.isclose(args.b, 0.75)
 
 
+def test_set_defaults():
+    parser = get_parser_cuda()
+    args = parser.parse_args([])
+    assert args.profiler_args() == ""
+    assert args.compiler_args() == ""
+    set_defaults(profiler_args="123")
+    args = parser.parse_args([])
+    assert args.profiler_args() == "123"
+    assert args.compiler_args() == ""
+    set_defaults(compiler_args="456")
+    args = parser.parse_args([])
+    assert args.profiler_args() == "123"
+    assert args.compiler_args() == "456"
+    set_defaults(profiler_args="")
+    args = parser.parse_args([])
+    assert args.profiler_args() == ""
+    assert args.compiler_args() == "456"
+    set_defaults(profiler_args="123")
+    args = parser.parse_args(["--profiler-args", "789"])
+    assert args.profiler_args() == "789"
+    assert args.compiler_args() == "456"
+
+
 def test_magic_cuda(
     capsys,
     plugin: NVCCPlugin,
@@ -191,6 +292,16 @@ def test_magic_cuda(
     check_profiler_output(capsys.readouterr().out)
 
 
+def test_magic_cuda_bad_args(
+    capsys,
+    plugin: NVCCPlugin,
+    sample_cuda_code: str,
+):
+    plugin.cuda("--this-is-an-unrecognized-argument", sample_cuda_code)
+    output = capsys.readouterr().out
+    assert output.startswith("usage: ")
+
+
 def test_magic_cuda_group_save(plugin: NVCCPlugin, sample_cuda_code: str):
     gname = "test_save_source"
     sname = "sample.cu"