diff --git a/docs/operations/integrations/container-runtime/docker.md b/docs/operations/integrations/container-runtime/docker.md index f4389a7b..7b8d3ec1 100644 --- a/docs/operations/integrations/container-runtime/docker.md +++ b/docs/operations/integrations/container-runtime/docker.md @@ -4,6 +4,4 @@ title: Docker slug: /operations/integrations/container-runtime/docker/ --- -Documentation for setting Dragonfly's container runtime to Docker. - -Dragonfly v2.2.0 drops support for `Docker`. If you want to integrate Docker, please refer to [container-runtime-docker](../../../../versioned_docs/version-v2.1.x/operations/integrations/container-runtime/docker.md). +Documentation for setting Dragonfly's container runtime to Docker. Dragonfly v2.2.0 drops support for `Docker`. diff --git a/docs/operations/integrations/container-runtime/singularity.md b/docs/operations/integrations/container-runtime/singularity.md index 7d30db5d..e48d58a5 100644 --- a/docs/operations/integrations/container-runtime/singularity.md +++ b/docs/operations/integrations/container-runtime/singularity.md @@ -3,7 +3,4 @@ id: singularity title: Singularity/Apptainer --- -Documentation for setting Dragonfly's container runtime to Singularity/Apptainer. - -Dragonfly v2.2.0 drops support for `Singularity/Apptainer`. If you want to integrate Singularity/Apptainer, -please refer to [container-runtime-singularity/apptainer](../../../../versioned_docs/version-v2.1.x/operations/integrations/container-runtime/singularity.md). +Documentation for setting Dragonfly's container runtime to Singularity/Apptainer. Dragonfly v2.2.0 drops support for `Singularity/Apptainer`. diff --git a/docs/roadmap/v2.2.md b/docs/roadmap/v2.2.md index 0ac3c10a..aca040c6 100644 --- a/docs/roadmap/v2.2.md +++ b/docs/roadmap/v2.2.md @@ -6,24 +6,16 @@ slug: /roadmap-v2.2/ Manager: -- Peer features are configurable. For example, you can make the peer can not be uploaded and can only be downloaded. -- Configure the weight of the scheduling. - Add clearing P2P task cache. -- Display P2P traffic distribution. - Peer information display, including CPU, Memory, etc. Scheduler: -- Provide metadata storage to support file writing and seeding. - Optimize scheduling algorithm and improve bandwidth utilization in the P2P network. Client: - Client written in Rust, reduce CPU usage and Memory usage. -- Supports RDMA for faster network transmission in the P2P network. - It can better support the loading of AI inference models into memory. -- Supports file writing and seeding, it can be accessed in the P2P cluster without uploading to other storage. - Helps AI models and AI datasets to be read and written faster in the P2P network. Others: diff --git a/docs/roadmap/v2.3.md b/docs/roadmap/v2.3.md new file mode 100644 index 00000000..30e9e9a7 --- /dev/null +++ b/docs/roadmap/v2.3.md @@ -0,0 +1,43 @@ +--- +id: roadmap-v2.3 +title: v2.3 +slug: /roadmap-v2.3/ +--- + +Manager: + +- Configure scheduling weights. +- Support scopes for Personal Access Tokens (PATs). +- Regularly clean up inactive schedulers and seed peers. +- Display more Peer information in the console, such as CPU and memory usage. +- Display persistent cache information of peers in the console. +- Add management of sync peers in the console. + +Scheduler: + +- Optimize the scheduling algorithm to improve bandwidth utilization in the P2P network. + +Client: + +- Support RDMA/QUIC for faster network transmission in the P2P network, enhancing the loading of + AI inference models into memory. +- Define a codable protocol for data transmission, providing faster encoding/decoding. +- Support persistent cache, allowing access within the P2P cluster without uploading to other storage, + facilitating faster read/write of AI models and datasets. +- Allow peers to get the QoS of parents and select the optimal parents for downloading. +- Preheat files in the memory cache to improve download speed. + +Others: + +- Add more performance tests in the dfbench command. +- Add more E2E tests and unit tests. + +Documentation: + +- Restructure the documentation to make it easier for users to navigate. +- Enhance the landing page UI. + +AI Infrastructure: + +- Optimize large file distribution within the infrastructure. +- Optimize handling of a large number of small I/Os for Nydus. diff --git a/docs/roadmap/v2.4.md b/docs/roadmap/v2.4.md new file mode 100644 index 00000000..c66a18d3 --- /dev/null +++ b/docs/roadmap/v2.4.md @@ -0,0 +1,36 @@ +--- +id: roadmap-v2.4 +title: v2.4 +slug: /roadmap-v2.4/ +--- + +Manager + +- Optimize memory and CPU usage. +- Add more features to the console. +- Provide more open APIs for the console. + +Scheduler + +- Optimize the scheduling algorithm to improve bandwidth utilization in the P2P network. + +Client + +- Support P2P for RDMA-based memory storage. +- Add distributed addressing, allowing deployment without relying on the manager and scheduler. +- Optimize file transfer speed in the P2P network. + +Others + +- Add more performance tests in the `dfbench` command. +- Add more E2E tests and unit tests. + +Documentation + +- Restructure the documentation to make it easier for users to navigate. +- Enhance the landing page UI. + +AI Infrastructure + +- Optimize large file distribution within the infrastructure. +- Optimize handling of a large number of small I/Os for Nydus. diff --git a/package.json b/package.json index 2f727e55..f9e4092e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "d7y-io", - "version": "0.1.2", + "version": "2.2.0", "private": true, "scripts": { "docusaurus": "docusaurus", diff --git a/versioned_docs/version-v2.2.0/FAQ.md b/versioned_docs/version-v2.2.0/FAQ.md new file mode 100644 index 00000000..25b4d031 --- /dev/null +++ b/versioned_docs/version-v2.2.0/FAQ.md @@ -0,0 +1,41 @@ +--- +id: faq +title: FAQ +slug: /faq/ +--- + +## Change log level {#change-log-level} + +Send `SIGUSR1` signal to dragonfly process to change log level + +```shell +kill -s SIGUSR1 +``` + +stdout: + +```text +change log level to debug +change log level to fatal +change log level to panic +change log level to dpanic +change log level to error +change log level to warn +change log level to info +``` + +> The change log level event will print in stdout and `core.log` file, but if the level is greater than `info`, stdout only. + +## 500 Internal Server Error {#500-internal-server-error} + +**1.** Check error logs in /var/log/dragonfly/dfdaemon/ + +**2.** Check source connectivity(dns error or certificate error) + +Example: + +```shell +curl https://example.harbor.local/ +``` + +When curl says error, please check the details in output. diff --git a/versioned_docs/version-v2.2.0/advanced-guides/personal-access-tokens.md b/versioned_docs/version-v2.2.0/advanced-guides/personal-access-tokens.md new file mode 100644 index 00000000..d0f850eb --- /dev/null +++ b/versioned_docs/version-v2.2.0/advanced-guides/personal-access-tokens.md @@ -0,0 +1,65 @@ +--- +id: personal-access-tokens +title: Personal Access Tokens +slug: /advanced-guides/personal-access-tokens/ +--- + +You can use a personal access token to call open API. + +In this article, you will learn how to create, use, modify and delete personal access token. + +## About personal access tokens + +Only users with `root` role can list all personal access tokens. + +![tokens](../resource/advanced-guides/personal-access-tokens/tokens.png) + +## Create personal access token + +Click the `ADD PERSONAL ACCESS TOKENS` button to create personal access token. + +**Name**: Set your token a descriptive name. + +**Description**: Set a description. + +**Expiration**: Set your token an expiration. + +**Scopes**: Select the access permissions for the token. + +![create-token](../resource/advanced-guides/personal-access-tokens/create-token.png) + +Click `SAVE` and copy the token and store it. For your security, it doesn't display again. + +![copy-token](../resource/advanced-guides/personal-access-tokens/copy-token.png) + +## Update personal access token + +Click `personal access token name` and update your personal access token. + +![update-token](../resource/advanced-guides/personal-access-tokens/update-token.png) + +## Delete personal access token + +Click `DELETE` and delete your personal access token. + +![delete-token](../resource/advanced-guides/personal-access-tokens/delete-token.png) + +## Use personal access token + +**Step 1:** Open Postman, and import [postman_collection.json](https://github.com/gaius-qi/dragonfly-docs/blob/main/manager/postman/Dragonfly.postman_collection.json). + +**Step 2:** Click **Open API** in the sidebar. + +**Step 3:** Click **Authorization** and select **Bearer Token**, paste `personal access token` in `Token`. + +![add-token-to-open-api](../resource/advanced-guides/personal-access-tokens/add-token-to-open-api.png) + +**Step 4:** Click **Headers**, check whether `Authorization` is added to Headers. + +![verify-headers](../resource/advanced-guides/personal-access-tokens/verify-headers.png) + +**Step 5:** Click **Send** button to initiate a request. + +**Step 6:** If successful, it means that the call to the open API is completed through the personal access token. + +![verify-open-api](../resource/advanced-guides/personal-access-tokens/verify-request.png) diff --git a/versioned_docs/version-v2.2.0/advanced-guides/preheat.md b/versioned_docs/version-v2.2.0/advanced-guides/preheat.md new file mode 100644 index 00000000..c82b52f1 --- /dev/null +++ b/versioned_docs/version-v2.2.0/advanced-guides/preheat.md @@ -0,0 +1,397 @@ +--- +id: preheat +title: Preheat +slug: /advanced-guides/preheat/ +--- + +This document will help you experience how to use Dragonfly's three preheat methods, +namely Open API preheat, console preheat and harbor preheat. + +## Open API + +Use Open API to preheat. + +### Create personal access token + +Click the `ADD PERSONAL ACCESS TOKENS` button to create personal access token. + +**Name**: Set your token a descriptive name. + +**Description**: Set a description. + +**Expiration**: Set your token an expiration. + +**Scopes**: Select the access permissions for the token. + +![create-token](../resource/advanced-guides/personal-access-tokens/create-token.png) + +Click `SAVE` and copy the token and store it. For your security, it doesn't display again. + +![copy-token](../resource/advanced-guides/personal-access-tokens/copy-token.png) + +### Preheat image + +Use Open API for preheating image. First create a POST request for preheating. + +**scope:** Select the scope of preheat as needed. + +- **Single Seed Peer**: Preheat to a seed peer. + +- **All Seed Peers**: Preheat to each seed peer in the P2P cluster. + +- **All Peers**: Preheat to each peer in the P2P cluster. + +**scheduler_cluster_ids:** Specify the preheated scheduler cluster id, +if `scheduler_cluster_ids` is empty, it means preheating all scheduler clusters. + +```bash +curl --location --request POST 'http://dragonfly-manager:8080/oapi/v1/jobs' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_dragonfly_personal_access_token' \ +--data-raw '{ + "type": "preheat", + "args": { + "type": "image", + "url": "https://index.docker.io/v2/library/alpine/manifests/3.19", + "username": "your_registry_username", + "password": "your_registry_password", + "scope": "single_seed_peer", + "scheduler_cluster_ids":[1] + } +}' +``` + +The command-line log returns the preheat job id. + +```bash +{ + "id": 1, + "created_at": "0001-01-01T00:00:00Z", + "updated_at": "0001-01-01T00:00:00Z", + "task_id": "group_9523f30a-877d-41f7-a25f-0854228341f6", + "bio": "", + "type": "preheat", + "state": "PENDING", + "args": { + "platform": "", + "scope": "single_seed_peer", + "tag": "", + "type": "image", + "url": "https://dockerpull.org/v2/library/alpine/manifests/3.19" + }, + "result": null, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-12-11T07:57:44Z", + "updated_at": "2024-12-11T07:57:44Z", + "name": "cluster-1" + } + ] +} +``` + +Polling the preheating status with job id. + +```bash +curl --request GET 'http://dragonfly-manager:8080/oapi/v1/jobs/1' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_dragonfly_personal_access_token' +``` + +If the status is `SUCCESS`, the preheating is successful. + +```bash +{ + "id": 1, + "created_at": "0001-01-01T00:00:00Z", + "updated_at": "0001-01-01T00:00:00Z", + "task_id": "group_9523f30a-877d-41f7-a25f-0854228341f6", + "bio": "", + "type": "preheat", + "state": "SUCCESS", + "args": { + "platform": "", + "scope": "single_seed_peer", + "tag": "", + "type": "image", + "url": "https://dockerpull.org/v2/library/alpine/manifests/3.19" + }, + "result": null, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-12-11T07:57:44Z", + "updated_at": "2024-12-11T07:57:44Z", + "name": "cluster-1" + } + ] +} +``` + +### Preheat file + +Use Open API for preheating file. First create a POST request for preheating. + +**scope:** Select the scope of preheat as needed. + +- **Single Seed Peer**: Preheat to a seed peer. + +- **All Seed Peers**: Preheat to each seed peer in the P2P cluster. + +- **All Peers**: Preheat to each peer in the P2P cluster. + +**scheduler_cluster_ids:** Specify the preheated scheduler cluster id, +if `scheduler_cluster_ids` is empty, it means preheating all scheduler clusters. + +```bash +curl --location --request POST 'http://dragonfly-manager:8080/oapi/v1/jobs' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_dragonfly_personal_access_token' \ +--header 'Authorization: token your_example.com_personal_access_token' \ +--data-raw '{ + "type": "preheat", + "args": { + "type": "file", + "url": "https://example.com", + "scope": "single_seed_peer", + "scheduler_cluster_ids":[1] + } +}' +``` + +The command-line log returns the preheat job id. + +```bash +{ + "id": 1, + "created_at": "2024-12-11T08:30:12Z", + "updated_at": "2024-12-11T08:30:51Z", + "task_id": "group_4dd1da54-96ca-48ff-8f20-4fae665f677f", + "bio": "", + "type": "preheat", + "state": "PENDING", + "args": { + "platform": "", + "scope": "single_seed_peer", + "type": "file", + "url": "https://example.com" + }, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-12-11T07:57:44Z", + "updated_at": "2024-12-11T07:57:44Z", + "name": "cluster-1" + } + ] +} +``` + +Polling the preheating status with job id. + +```bash +curl --request GET 'http://dragonfly-manager:8080/oapi/v1/jobs/1' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_dragonfly_personal_access_token' +``` + +If the status is `SUCCESS`, the preheating is successful. + +```bash +{ + "id": 1, + "created_at": "2024-12-11T08:30:12Z", + "updated_at": "2024-12-11T08:30:51Z", + "task_id": "group_4dd1da54-96ca-48ff-8f20-4fae665f677f", + "bio": "", + "type": "preheat", + "state": "SUCCESS", + "args": { + "platform": "", + "scope": "single_seed_peer", + "type": "file", + "url": "https://example.com" + }, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-12-11T07:57:44Z", + "updated_at": "2024-12-11T07:57:44Z", + "name": "cluster-1" + } + ] +} +``` + +## Console + +Use console for preheating image. + +### Preheat + +Display all of the preheat tasks. + +![preheats](../resource/advanced-guides/preheat/preheats.png) + +### Create Preheat + +Click the `ADD PREHEAT` button to create preheat task. + +**Description**: Set a description. + +**Clusters**: Used for clusters that need to be preheat. + +**URL**: URL address used to specify the resource to be preheat. + +**Scope:** Select the scope of preheat as needed. + +- **Single Seed Peer**: Preheat to a seed peer. + +- **All Seed Peers**: Preheat to each seed peer in the P2P cluster. + +- **All Peers**: Preheat to each peer in the P2P cluster. + +**Tag**: When the URL of the preheat task are the same but the Tag are different, they will be distinguished based on the +tag and the generated preheat task will be different. + +**Filtered Query Params**: By setting the filteredQueryParams parameter, you can specify +the file type of the resource that needs to be preheated. +The filteredQueryParams is used to generate a unique preheat task and filter unnecessary query parameters in the URL. + +![create-preheat](../resource/advanced-guides/preheat/create-preheat.png) + +Click the `SAVE` to generate the preheat task,the generated preheat task will not return results immediately and +you need to wait. + +![penging-preheat](../resource/advanced-guides/preheat/pending-preheat.png) + +### Preheat Success + +If the status is `SUCCESS`, the preheating is successful. + +![success-preheat](../resource/advanced-guides/preheat/success-preheat.png) + +### Preheat Failure + +If the status is `FAILURE`, the preheating is failure and an error log is displayed. + +![failure-preheat](../resource/advanced-guides/preheat/failure-preheat.png) + +## Harbor + +Use harbor for preheating image, please refer to the +[harbor](https://goharbor.io/docs/2.11.0/administration/p2p-preheat/) documentation for details. + +### Configure self-signed certificates for registry + +> Notice: If harbor is not configured self-signed certificates, please ignore the following. + +To support preheating for harbor with self-signed certificates, +the Manager configuration needs to be modified. + +Configure `manager.yaml`, the default path is `/etc/dragonfly/manager.yaml`, +refer to [manager config](../reference/configuration/manager.md). + +> Notice: `yourdomain.crt` is Harbor's ca.crt. + +```shell +job: + # Preheat configuration. + preheat: + tls: + # insecureSkipVerify controls whether a client verifies the server's certificate chain and hostname. + insecureSkipVerify: false + # # caCert is the CA certificate for preheat tls handshake, it can be path or PEM format string. + caCert: /etc/certs/yourdomain.crt +``` + +Skip TLS verification, set `job.preheat.tls.insecureSkipVerify` to true. + +```shell +job: + # Preheat configuration. + preheat: + tls: + # insecureSkipVerify controls whether a client verifies the server's certificate chain and hostname. + insecureSkipVerify: true + # # caCert is the CA certificate for preheat tls handshake, it can be path or PEM format string. + # caCert: '' +``` + +### Create personal access token {#harbor-create-personal-access-token} + +Click the `ADD PERSONAL ACCESS TOKENS` button to create personal access token. + +**Name**: Set your token a descriptive name. + +**Description**: Set your token a descriptive information. + +**Expiration**: Set your token an expiration. + +**Scopes**: Select the access permissions for the token. + +![create-token](../resource/advanced-guides/preheat/create-token.png) + +Click `SAVE` and copy the token and store it. For your security, it doesn't display again. + +![copy-token](../resource/advanced-guides/personal-access-tokens/copy-token.png) + +### Create instance + +Open the harbor UI, go to `Distributions` item under `Administration` +and click the `NEW INSTANCE` button to create create instance. + +**Step 1:** Enter REST address of the Dragonfly Manager. + +**Step 2:** Auth Mode selects OAuth for authentication. + +**Step 3:** Enter personsal assess token. + +![create-instance](../resource/advanced-guides/preheat/create-instance.png) + +Click the `TEST CONNECTION` button to test the connectivity of the creating instance. +If the connectivity testing is successful, click the `OK` button to save the creating instance. + +> Notice: Instance status must be `Healthy`. + +![instance](../resource/advanced-guides/preheat/instance.png) + +### Create P2P provider policy + +Go to `Projects` and open your project from the project list, and open the `P2P Preheat` tab. + +![p2p-preheat](../resource/advanced-guides/preheat/p2p-preheat.png) + +Click the `NEW POLICY` button to create P2P provider policy. + +**Scope:** Select the scope of preheat as needed. + +- **Single Seed Peer**: Preheat to a seed peer. + +- **All Seed Peers**: Preheat to each seed peer in the P2P cluster. + +- **All Peers**: Preheat to each peer in the P2P cluster. + +**Step 2:** Enter the cluster id of Dragonfly Manager in `Cluster IDs` to specify the preheated cluster. +If `Cluster IDs` is empty, it means to preheat all clusters. + +![create-policy](../resource/advanced-guides/preheat/create-policy.png) + +### Executions Preheat policy + +Click the `EXECUTE` to execute the preheating task. + +![exectu-preheat](../resource/advanced-guides/preheat/exectu-preheat.png) + +If the status is SUCCESS, the preheating is successful. + +![executions](../resource/advanced-guides/preheat/executions.png) + +Click the executions `ID` to view the detailed information of the preheating task, and click the Logs icon to view the log. + +![executions-success](../resource/advanced-guides/preheat/executions-success.png) + +The expected output is as follows. + +![log](../resource/advanced-guides/preheat/log.png) diff --git a/versioned_docs/version-v2.2.0/advanced-guides/task.md b/versioned_docs/version-v2.2.0/advanced-guides/task.md new file mode 100644 index 00000000..e6e5eeac --- /dev/null +++ b/versioned_docs/version-v2.2.0/advanced-guides/task.md @@ -0,0 +1,534 @@ +--- +id: task-manager +title: Task Manager +slug: /advanced-guides/task-manager/ +--- + +This document will help you experience how to use Dragonfly's two methods of finding and clearing P2P task caches, +namely Open API and console. + +## Open API + +Use Open API to find and clear the P2P task cache. + +### Create personal access token + +Click the `ADD PERSONAL ACCESS TOKENS` button to create personal access token. + +**Name**: Set your token a descriptive name. + +**Description**: Set a description. + +**Expiration**: Set your token an expiration. + +**Scopes**: Select the access permissions for the token. + +![create-token](../resource/advanced-guides/task/create-token.png) + +Click `SAVE` and copy the token and store it. For your security, it doesn't display again. + +![copy-token](../resource/advanced-guides/personal-access-tokens/copy-token.png) + +### Search task {#open-api-search-task} + +#### Search by URL {#open-api-search-by-url} + +Use Open API for find task. First create a POST request for find task. + +```shell +curl --location --request POST 'http://dragonfly-manager:8080/oapi/v1/jobs' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_dragonfly_personal_access_token' \ +--data-raw '{ + "type": "get_task", + "args": { + "url": "https://example.com", + "tag": "your_url_tag", + "application": "your_url_application" + } +}' +``` + +The command-line log returns the find task job id. + +```shell +{ + "id": 1, + "created_at": "0001-01-01T00:00:00Z", + "updated_at": "0001-01-01T00:00:00Z", + "task_id": "group_b58cdd29-aaae-498c-beab-a24e5d325366", + "bio": "", + "type": "get_task", + "state": "PENDING", + "args": { + "application": "", + "filtered_query_params": "", + "tag": "", + "task_id": "", + "url": "https://example.com" + }, + "result": null, + "seed_peer_clusters": null, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-11-15T08:06:37Z", + "updated_at": "2024-11-15T08:06:37Z", + "name": "cluster-1", + }, + ] +} +``` + +Polling the task status with job id. + +```shell +curl --request GET 'http://dragonfly-manager:8080/oapi/v1/jobs/1' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_dragonfly_personal_access_token' +``` + +If status is `SUCCESS`, it means that the find task is successful. +`result.job_states.results.peers` is the address of the task cache. + +```shell +{ + "id": 1, + "created_at": "2024-11-19T08:08:23Z", + "updated_at": "2024-11-19T08:08:53Z", + "type": "get_task", + "state": "SUCCESS", + "args": { + "application": "", + "filtered_query_params": "", + "tag": "", + "task_id": "", + "url": "https://example.com" + }, + "result": { + "created_at": "2024-11-19T08:08:23.087253883Z", + "job_states": [ + { + "created_at": "2024-11-19T08:08:23.087253883Z", + "error": "", + "results": [ + { + "peers": [ + { + "created_at": "2024-11-19T08:13:35.210473555Z", + "host_type": "super", + "hostname": "dragonfly-seed-client-0", + "id": "10.244.1.32-dragonfly-seed-client-0-9783271e-e27c-4b16-a7c9-6a8ffe18cd1a-seed", + "ip": "10.244.1.32", + "updated_at": "2024-11-19T08:13:35.364939639Z" + } + ], + "scheduler_cluster_id": 1 + } + ], + "state": "SUCCESS", + "task_name": "get_task", + "task_uuid": "task_ddc9db9d-cb7f-4abd-b1bc-b4c28f259dcb", + "ttl": 0 + }, + ], + "state": "SUCCESS", + "updated_at": "2024-11-19T08:08:53.157878758Z" + }, + "seed_peer_clusters": [], + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-11-15T08:06:37Z", + "updated_at": "2024-11-15T08:06:37Z", + "name": "cluster-1", + } + ] +} +``` + +#### Search by task id {#open-api-search-by-task-id} + +Use Open API for find task. First create a POST request for find task. + +```shell +curl --location --request POST 'http://dragonfly-manager:8080/oapi/v1/jobs' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_dragonfly_personal_access_token' \ +--data-raw '{ + "type": "get_task", + "args": { + "task_id": "your_task_id" + } +}' +``` + +The command-line log returns the find task job id. + +```shell +{ + "id": 1, + "created_at": "0001-01-01T00:00:00Z", + "updated_at": "0001-01-01T00:00:00Z", + "task_id": "group_b58cdd29-aaae-498c-beab-a24e5d325366", + "bio": "", + "type": "get_task", + "state": "PENDING", + "args": { + "task_id": "your_task_id", + }, + "result": null, + "seed_peer_clusters": null, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-11-15T08:06:37Z", + "updated_at": "2024-11-15T08:06:37Z", + + "name": "cluster-1", + }, + ] +} +``` + +Polling the task status with job id. + +```shell +curl --request GET 'http://dragonfly-manager:8080/oapi/v1/jobs/1' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_dragonfly_personal_access_token' +``` + +If status is `SUCCESS`, it means that the find task is successful. +`result.job_states.results.peers` is the address of the task cache. + +```shell +{ + "id": 1, + "created_at": "2024-11-19T08:08:23Z", + "updated_at": "2024-11-19T08:08:53Z", + "type": "get_task", + "state": "SUCCESS", + "args": { + "task_id": "your_task_id", + }, + "result": { + "created_at": "2024-11-19T08:08:23.087253883Z", + "job_states": [ + { + "created_at": "2024-11-19T08:08:23.087253883Z", + "results": [ + { + "peers": [ + { + "created_at": "2024-11-19T08:13:35.210473555Z", + "host_type": "super", + "hostname": "dragonfly-seed-client-0", + "id": "10.244.1.32-dragonfly-seed-client-0-9783271e-e27c-4b16-a7c9-6a8ffe18cd1a-seed", + "ip": "10.244.1.32", + "updated_at": "2024-11-19T08:13:35.364939639Z" + } + ], + "scheduler_cluster_id": 1 + } + ], + "state": "SUCCESS", + "task_name": "get_task", + "task_uuid": "task_ddc9db9d-cb7f-4abd-b1bc-b4c28f259dcb", + "ttl": 0 + }, + ], + "state": "SUCCESS", + "updated_at": "2024-11-19T08:08:53.157878758Z" + }, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-11-15T08:06:37Z", + "updated_at": "2024-11-15T08:06:37Z", + "name": "cluster-1", + } + ] +} +``` + +### Delete task {#open-api-delete-task} + +#### Delete by URL + +Use Open API for delete task. First create a POST request for delete task. + +If `scheduler_cluster_ids` does not exist, it means deleting all caches of the task. +`scheduler_cluster_ids` is the `result.job_states.results.scheduler_cluster_id` of the search task. + +```shell +curl --location --request POST 'http://dragonfly-manager:8080/oapi/v1/jobs' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_dragonfly_personal_access_token' \ +--data-raw '{ + "type": "delete_task", + "args": { + "url": "https://example.com", + "tag": "your_url_tag", + "application": "your_url_application" + }, + "scheduler_cluster_ids":[your_scheduler_cluster_id] +}' +``` + +The command line log returns the deletion task job id. + +```shell +{ + "id": 2, + "created_at": "0001-01-01T00:00:00Z", + "updated_at": "0001-01-01T00:00:00Z" + "task_id": "group_9da544b6-ac3d-4434-9c80-56b137c7e2be", + "bio": "", + "type": "delete_task", + "state": "PENDING", + "args": { + "application": "", + "filtered_query_params": "", + "tag": "", + "url": "https://example.com" + }, + "result": null, + }, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-11-15T08:06:37Z", + "updated_at": "2024-11-15T08:06:37Z", + "name": "cluster-1", + }, + ] +} +``` + +Polling the delete task status with job id. + +```shell +curl --request GET 'http://dragonfly-manager:8080/oapi/v1/jobs/2' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_dragonfly_personal_access_token' +``` + +If the status is SUCCESS and failure_tasks is empty, it means that the deletion task is successful. + +```shell +{ + "id": 2, + "created_at": "2024-11-19T07:51:46Z", + "updated_at": "2024-11-19T07:52:45Z" + "task_id": "group_909c09ee-4d4f-4033-be95-08dd800330bd", + "bio": "", + "type": "delete_task", + "state": "SUCCESS", + "args": { + "application": "", + "filtered_query_params": "", + "tag": "", + "task_id": "", + "url": "https://example.com" + }, + "result": { + "created_at": "2024-11-19T07:51:46.33966588Z", + "job_states": [ + { + "created_at": "2024-11-19T07:51:46.33966588Z", + "error": "", + "results": [ + { + "failure_tasks": [], + "scheduler_cluster_id": 1, + "success_tasks": [ + { + "host_type": "super", + "hostname": "dragonfly-seed-client-0", + "ip": "10.244.1.32" + } + ] + } + ], + "state": "SUCCESS", + "task_name": "delete_task", + }, + ], + "state": "SUCCESS", + "updated_at": "2024-11-19T07:52:44.646613879Z" + }, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-11-15T08:06:37Z", + "updated_at": "2024-11-15T08:06:37Z", + "name": "cluster-1", + } + ] +} +``` + +#### Delete by task id + +Use Open API for delete task. First create a POST request for delete task. + +If `scheduler_cluster_ids` does not exist, it means deleting all caches of the task. +`scheduler_cluster_ids` is the `result.job_states.results.scheduler_cluster_id` of the search task. + +```shell +curl --location --request POST 'http://dragonfly-manager:8080/oapi/v1/jobs' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_dragonfly_personal_access_token' \ +--data-raw '{ + "type": "delete_task", + "args": { + "task_id": "your_task_id" + }, + "scheduler_cluster_ids":[your_scheduler_cluster_id] +}' +``` + +The command line log returns the deletion task job id. + +```shell +{ + "id": 2, + "created_at": "0001-01-01T00:00:00Z", + "updated_at": "0001-01-01T00:00:00Z" + "task_id": "group_9da544b6-ac3d-4434-9c80-56b137c7e2be", + "bio": "", + "type": "delete_task", + "state": "PENDING", + "args": { + "task_id": "your_task_id", + }, + "result": null, + }, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-11-15T08:06:37Z", + "updated_at": "2024-11-15T08:06:37Z", + "name": "cluster-1", + }, + ] +} +``` + +Polling the delete task status with job id. + +```shell +curl --request GET 'http://dragonfly-manager:8080/oapi/v1/jobs/2' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_dragonfly_personal_access_token' +``` + +If the status is SUCCESS and failure_tasks is empty, it means that the deletion task is successful. + +```shell +{ + "id": 2, + "created_at": "2024-11-19T07:51:46Z", + "updated_at": "2024-11-19T07:52:45Z" + "task_id": "group_909c09ee-4d4f-4033-be95-08dd800330bd", + "bio": "", + "type": "delete_task", + "state": "SUCCESS", + "args": { + "task_id": "your_task_id", + }, + "result": { + "created_at": "2024-11-19T07:51:46.33966588Z", + "job_states": [ + { + "created_at": "2024-11-19T07:51:46.33966588Z", + "error": "", + "results": [ + { + "failure_tasks": [], + "scheduler_cluster_id": 1, + "success_tasks": [ + { + "host_type": "super", + "hostname": "dragonfly-seed-client-0", + "ip": "10.244.1.32" + } + ] + } + ], + "state": "SUCCESS", + "task_name": "delete_task", + }, + ], + "state": "SUCCESS", + "updated_at": "2024-11-19T07:52:44.646613879Z" + }, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-11-15T08:06:37Z", + "updated_at": "2024-11-15T08:06:37Z", + "name": "cluster-1", + } + ] +} +``` + +## Console + +Use console to find and clear the P2P task cache. + +### Search task + +#### Search by URL + +**URL**: Query the task cache based on the URL. + +**Tag**: When the task URL is the same but the tags are different, +they will be distinguished based on the tags, and the queried tasks will also be different. + +**Application**: Caller application which is used for statistics and access control. + +**Filter Query Params**: Filter the query parameters of the downloaded URL. +If the download URL is the same, it will be scheduled as the same task. + +![search-task-by-url](../resource/advanced-guides/task/search-task-by-url.png) + +#### Search by task id + +**Task ID**: Query the task cache based on the task id. + +![search-task-by-task-id](../resource/advanced-guides/task/search-task-by-task-id.png) + +### Delete task + +Click `DELETE` and delete task. + +![delete-task](../resource/advanced-guides/task/delete-task.png) + +The deleted task will not return results immediately and you need to wait. + +![pending-task](../resource/advanced-guides/task/pending-task.png) + +### Executions + +Displays all deleted task. + +![executions](../resource/advanced-guides/task/executions.png) + +#### Delete successfully + +If the status is SUCCESS and the Failure list does not exist, it means that the deletion task is successful. + +![success-task](../resource/advanced-guides/task/success-task.png) + +#### Delete failed + +The Failure list will show the tasks that failed to execute. + +![failure-task](../resource/advanced-guides/task/failure-task.png) + +Click the `Description` icon to view the failure log. + +![error-log](../resource/advanced-guides/task/error-log.png) diff --git a/versioned_docs/version-v2.2.0/advanced-guides/web-console.md b/versioned_docs/version-v2.2.0/advanced-guides/web-console.md new file mode 100644 index 00000000..96875238 --- /dev/null +++ b/versioned_docs/version-v2.2.0/advanced-guides/web-console.md @@ -0,0 +1,15 @@ +--- +id: web-console +title: Web Console +slug: /advanced-guides/web-console/ +--- + +Console page: + +- [Sign in](./web-console/signin.md) +- [Sign up](./web-console/signup.md) +- [Cluster](./web-console/cluster.md) +- [Personal Access Tokens](./web-console/developer/personal-access-tokens.md) +- [Preheat](./web-console/job/preheat.md) +- [Task](./web-console/job/task.md) +- [User](./web-console/user.md) diff --git a/versioned_docs/version-v2.2.0/advanced-guides/web-console/cluster.md b/versioned_docs/version-v2.2.0/advanced-guides/web-console/cluster.md new file mode 100644 index 00000000..aed0d2c8 --- /dev/null +++ b/versioned_docs/version-v2.2.0/advanced-guides/web-console/cluster.md @@ -0,0 +1,151 @@ +--- +id: cluster +title: Cluster +slug: /advanced-guides/web-console/cluster/ +--- + +In this article, you will be shown Cluster page information. + +## Clusters + +When you sign in successfully, you will come to the clusters list page, which will display all of the clusters information. + +![clusters](../../resource/advanced-guides/web-console/cluster/clusters.png) + +## Cluster + +Display the cluster details, each attribute has prompt information on the button `?`. + +![cluster](../../resource/advanced-guides/web-console/cluster/cluster.png) + +## Create Cluster + +Create a cluster, and at the same time create a set of scheduler cluster and seed peer cluster with `1:1` relationship. +A Cluster can represent a P2P cluster, including a scheduler cluster and a seed peer cluster. + +![create-cluster](../../resource/advanced-guides/web-console/cluster/create-cluster.png) + +### Information + +The information of cluster. + +**Set cluster as your default cluster**: When peer does not find a matching cluster based on scopes, +the default cluster will be used. + +### Scopes + +The cluster needs to serve the scope. It wil provide scheduler services and seed peer services to peers in the scope. + +**Location**: The cluster needs to serve all peers in the location. When the location in the peer configuration matches +the location in the cluster, the peer will preferentially use the scheduler and the seed peer of the cluster. +It separated by "|", for example "area|country|province|city". + +**IDC**: The cluster needs to serve all peers in the IDC. When the IDC in the peer configuration matches the IDC in the cluster, +the peer will preferentially use the scheduler and the seed peer of the cluster. +IDC has higher priority than location in the scopes. + +**CIDRs**: The cluster needs to serve all peers in the CIDRs. The advertise IP will be reported in the peer +configuration when the peer is started, and if the advertise IP is empty in the peer configuration, +peer will automatically get expose IP as advertise IP. When advertise IP of the peer matches the CIDRs in cluster, +the peer will preferentially use the scheduler and the seed peer of the cluster. +CIDRs has higher priority than IDC in the scopes. + +**Hostnames**: The cluster needs to serve all peers in Hostname. The input parameter is the multiple Hostname regexes. +The Hostname will be reported in the peer configuration when the peer is started. +When the Hostname matches the multiple Hostname regexes in the cluster, +the peer will preferentially use the scheduler and the seed peer of the cluster. +Hostname has higher priority than IDC in the scopes.Hostname has priority equal to CIDRs in the scopes. + +### Config + +The configuration for P2P downloads. + +**Seed Peer load limit**: Int If other peers download from the seed peer, the load of the seed peer will increase. +When the load limit of the seed peer is reached, the scheduler will no longer schedule other peers to +download from the seed peer until the it has the free load. + +**Peer load limit**: If other peers download from the peer, the load of the peer will increase. +When the load limit of the peer is reached, the scheduler will no longer schedule other peers to +download from the peer until the it has the free load. + +**Candidate parent limit**: The maximum number of parents that the scheduler can schedule for download peer. + +**Filter parent limit**: The scheduler will randomly select the number of parents from all the parents according to +the filter parent limit and evaluate the optimal parents in selecting parents for the peer to download task. +The number of optimal parent is the scheduling parent limit. + +**Job Rate Limit(requests per seconds)**: The rate limit(requests per second) for job Open API, default value is 10. + +## Update Cluster + +Update cluster information. + +![update-cluster](../../resource/advanced-guides/web-console/cluster/update-cluster.png) + +## Delete Cluster + +Delete cluster and at the same time delete scheduler cluster and seed peer cluster. + +![delete-cluster](../../resource/advanced-guides/web-console/cluster/delete-cluster.png) + +## Schedulers + +Display the schedulers. + +![schedulers](../../resource/advanced-guides/web-console/cluster/schedulers.png) + +## Scheduler + +Display the scheduler details. + +![scheduler](../../resource/advanced-guides/web-console/cluster/scheduler.png) + +## Delete Scheduler + +Delete scheduler record in database. + +![delete-scheduler](../../resource/advanced-guides/web-console/cluster/delete-scheduler.png) + +## Delete all inative instances {#delete-all-inative-scheduler-instances} + +Delete inactive schedulers record in database. + +![delete-scheduler-inative-instances](../../resource/advanced-guides/web-console/cluster/delete-inactive-scheduler.png) + +## Seed Peer + +Display the seed peer details. + +![seed-peer](../../resource/advanced-guides/web-console/cluster/seed-peer.png) + +## Delete seed peer + +Delete seed peer record in database. + +![delete-seed-peer](../../resource/advanced-guides/web-console/cluster/delete-seed-peer.png) + +## Delete all inative instances + +Delete inactive seed peers record in database. + +![delete-all-inative-instances](../../resource/advanced-guides/web-console/cluster/delete-inactive-seed-peer.png) + +## Peers + +Display the peer information, You can click the `?` button to display prompt information. + +> Note: peer data is T+1. + +![peers](../../resource/advanced-guides/web-console/cluster/peers.png) + +## Refresh Peer Date + +Force refresh of peer data. + +![refresh-peer](../../resource/advanced-guides/web-console/cluster/refresh-peer.png) + +## Export Peer Date + +Export the peer data. + +![export-peer](../../resource/advanced-guides/web-console/cluster/export-peer.png) diff --git a/versioned_docs/version-v2.2.0/advanced-guides/web-console/developer/personal-access-tokens.md b/versioned_docs/version-v2.2.0/advanced-guides/web-console/developer/personal-access-tokens.md new file mode 100644 index 00000000..ce01d8b2 --- /dev/null +++ b/versioned_docs/version-v2.2.0/advanced-guides/web-console/developer/personal-access-tokens.md @@ -0,0 +1,51 @@ +--- +id: personal-access-tokens +title: Personal Access Tokens +slug: /advanced-guides/web-console/developer/personal-access-tokens/ +--- + +In this article, you will be shown Personal Access Tokens page information. + +## Personal Access Tokens + +Display all of the personal access tokens information. + +![tokens](../../../resource/advanced-guides/web-console/developer/personal-access-tokens/tokens.png) + +## Create Personal Access Token + +Create a personal access token. + +![create-token](../../../resource/advanced-guides/web-console/developer/personal-access-tokens/create-token.png) + +### Information + +The information of Personal Access Token. + +### Expiration + +Token expiration time. + +**Expiration**: Set your token an expiration. + +### Select scopes + +Select the access permissions for the token. + +**preheat**: Full control of preheating, it's used for preheating of harbor. + +**job**: Full control of job. If you need to call preheat job through open API, it is recommended to use preheat job. + +**cluster**: Full control of cluster. + +## Update Personal Access Token + +Update personal access token information. + +![update-token](../../../resource/advanced-guides/web-console/developer/personal-access-tokens/update-token.png) + +## Delete Personal Access Token + +Delete your personal access token. + +![delete-token](../../../resource/advanced-guides/web-console/developer/personal-access-tokens/delete-token.png) diff --git a/versioned_docs/version-v2.2.0/advanced-guides/web-console/insight/peer.md b/versioned_docs/version-v2.2.0/advanced-guides/web-console/insight/peer.md new file mode 100644 index 00000000..3380fafe --- /dev/null +++ b/versioned_docs/version-v2.2.0/advanced-guides/web-console/insight/peer.md @@ -0,0 +1,21 @@ +--- +id: peer +title: Peer +slug: /advanced-guides/web-console/insight/peer/ +--- + +In this article, you will be shown Peer page information. + +## Peers + +Display the peer information, You can click the `?` button to display prompt information. + +> Note: peer data is T+1. + +![peers](../../../resource/advanced-guides/web-console/insight/peer/peers.png) + +## Export Peer Date + +Export the peer data. + +![export-peer](../../../resource/advanced-guides/web-console/insight/peer/export-peer.png) diff --git a/versioned_docs/version-v2.2.0/advanced-guides/web-console/job/preheat.md b/versioned_docs/version-v2.2.0/advanced-guides/web-console/job/preheat.md new file mode 100644 index 00000000..ae744c94 --- /dev/null +++ b/versioned_docs/version-v2.2.0/advanced-guides/web-console/job/preheat.md @@ -0,0 +1,66 @@ +--- +id: preheat +title: Preheat +slug: /advanced-guides/web-console/job/preheat/ +--- + +In this article, you will be shown Preheat page information. + +## Preheats + +Display all of the preheat tasks. + +![preheats](../../../resource/advanced-guides/preheat/preheats.png) + +## Preheat + +Display the preheat details, The `status` attribute shows whether the preheat is successful. + +![preheat](../../../resource/advanced-guides/preheat/success-preheat.png) + +## Preheat Failure + +If the status is `FAILURE`, the preheating is failure and an error log is displayed. + +![preheat-failure](../../../resource/advanced-guides/preheat/failure-preheat.png) + +## Create Preheat + +Create a preheat task for file preheating. + +![create-preheat](../../../resource/advanced-guides/preheat/create-preheat.png) + +### Information + +The information of Preheat. + +**Description**: Set a description. + +### Clusters + +Preheat the cluster. + +**Clusters**: Used for clusters that need to be preheat. + +### Args + +Args used to pass additional configuration options to the preheat task. + +**Scope**: Select the scope of preheat as needed. + +- **Single Seed Peer**: Preheat to a seed peer. + +- **All Seed Peers**: Preheat to each seed peer in the P2P cluster. + +- **All Peers**: Preheat to each peer in the P2P cluster. + +**URL**: URL address used to specify the resource to be preheat. + +**Tag**: When the URL of the preheat task are the same but the Tag are different, they will be distinguished based on the +tag and the generated preheat task will be different. + +**Filtered Query Params**: By setting the filteredQueryParams parameter, you can specify +the file type of the resource that needs to be preheated. +The filteredQueryParams is used to generate a unique preheat task and filter unnecessary query parameters in the URL. + +**ADD Headers**: Add headers for preheat request. diff --git a/versioned_docs/version-v2.2.0/advanced-guides/web-console/job/task.md b/versioned_docs/version-v2.2.0/advanced-guides/web-console/job/task.md new file mode 100644 index 00000000..55a859ee --- /dev/null +++ b/versioned_docs/version-v2.2.0/advanced-guides/web-console/job/task.md @@ -0,0 +1,61 @@ +--- +id: task +title: Task +slug: /advanced-guides/web-console/job/task/ +--- + +In this article, you will be shown Task page information. + +## Search task + +### Search by URL + +**URL**: Query the task cache based on the URL. + +**Tag**: When the task URL is the same but the tags are different, +they will be distinguished based on the tags, and the queried tasks will also be different. + +**Application**: Caller application which is used for statistics and access control. + +**Filter Query Params**: Filter the query parameters of the downloaded URL. +If the download URL is the same, it will be scheduled as the same task. + +![search-task-by-url](../../../resource/advanced-guides/task/search-task-by-url.png) + +### Search by task id + +**Task ID**: Query the task cache based on the task id. + +![search-task-by-task-id](../../../resource/advanced-guides/task/search-task-by-task-id.png) + +## Delete task + +Click `DELETE` and delete task. + +![delete-task](../../../resource/advanced-guides/task/delete-task.png) + +The deleted task will not return results immediately and you need to wait. + +![pending-task](../../../resource/advanced-guides/task/pending-task.png) + +## Executions + +Displays all deleted task. + +![executions](../../../resource/advanced-guides/task/executions.png) + +### Delete successfully + +If the status is SUCCESS and the Failure list does not exist, it means that the deletion task is successful. + +![success-task](../../../resource/advanced-guides/task/success-task.png) + +### Delete failed + +The Failure list will show the tasks that failed to execute. + +![failure-task](../../../resource/advanced-guides/task/failure-task.png) + +Click the `Description` icon to view the failure log. + +![error-log](../../../resource/advanced-guides/task/error-log.png) diff --git a/versioned_docs/version-v2.2.0/advanced-guides/web-console/signin.md b/versioned_docs/version-v2.2.0/advanced-guides/web-console/signin.md new file mode 100644 index 00000000..339389e8 --- /dev/null +++ b/versioned_docs/version-v2.2.0/advanced-guides/web-console/signin.md @@ -0,0 +1,11 @@ +--- +id: signin +title: Sign in +slug: /advanced-guides/web-console/sign-in/ +--- + +The default username and password are `root` and `dragonfly`. + +> Note: It is strongly recommended that you change the default administrator password. + +![signin](../../resource/advanced-guides/web-console/login//signin.png) diff --git a/versioned_docs/version-v2.2.0/advanced-guides/web-console/signup.md b/versioned_docs/version-v2.2.0/advanced-guides/web-console/signup.md new file mode 100644 index 00000000..2ba27980 --- /dev/null +++ b/versioned_docs/version-v2.2.0/advanced-guides/web-console/signup.md @@ -0,0 +1,17 @@ +--- +id: signup +title: Sign up +slug: /advanced-guides/web-console/sign-up/ +--- + +You can register a new account through the sign up page. + +![signup](../../resource/advanced-guides/web-console/login/signup.png) + +**Account**: Fill in the username, the length is 3-10. + +**Email**: Email address, the length is 3-10. + +**Password**: At least 8-16 characters, with at least 1 lowercase letter and 1 number. + +**ConfirmPassword**: Confirmation Password. diff --git a/versioned_docs/version-v2.2.0/advanced-guides/web-console/user.md b/versioned_docs/version-v2.2.0/advanced-guides/web-console/user.md new file mode 100644 index 00000000..490941cb --- /dev/null +++ b/versioned_docs/version-v2.2.0/advanced-guides/web-console/user.md @@ -0,0 +1,43 @@ +--- +id: user +title: User +slug: /advanced-guides/web-console/user/ +--- + +In this article, you will be shown User page information. + +## Users + +Only users with the `root` role can view the list of all users. + +![users](../../resource/advanced-guides/web-console/user/users.png) + +## User + +Display the user details. + +![user](../../resource/advanced-guides/web-console/user/user.png) + +## Update User Role + +Only users with the `root` role can change the roles of other users. + +![update-user-role](../../resource/advanced-guides/web-console/user/update-user-role.png) + +## Profile + +Display user's own profile. + +![profile](../../resource/advanced-guides/web-console/user/profile.png) + +## Change Password + +You can change your password. + +![change-password](../../resource/advanced-guides/web-console/user/change-password.png) + +## Update Profile + +Update user's own profile. + +![update-profile](../../resource/advanced-guides/web-console/user/update-profile.png) diff --git a/versioned_docs/version-v2.2.0/development-guide/configure-development-nvironment.md b/versioned_docs/version-v2.2.0/development-guide/configure-development-nvironment.md new file mode 100644 index 00000000..d84ed4b1 --- /dev/null +++ b/versioned_docs/version-v2.2.0/development-guide/configure-development-nvironment.md @@ -0,0 +1,210 @@ +--- +id: configure-development-environment +title: Configure the Development Environment +slug: /development-guide/configure-development-environment/ +--- + +This document describes how to configure a local development environment for Dragonfly. + +## Prerequisites {#prerequisites} + + + +| Name | Version | Document | +| -------- | ---------------------------- | ---------------------------------------------------------------------------- | +| Git | 1.9.1+ | [git-scm](https://git-scm.com/) | +| Golang | 1.16.x | [go.dev](https://go.dev/) | +| Rust | 1.6+ | [rustup.rs](https://rustup.rs/) | +| Database | Mysql 5.6+ OR PostgreSQL 12+ | [mysql](https://www.mysql.com/) OR [postgresql](https://www.postgresql.org/) | +| Redis | 3.0+ | [redis.io](https://redis.io/) | + + + +## Clone Dragonfly {#clone-dragonfly} + +Clone the source code of Dragonfly: + +```bash +git clone --recurse-submodules https://github.com/dragonflyoss/dragonfly.git +cd dragonfly +``` + +Clone the source code of Client: + +```bash +git clone https://github.com/dragonflyoss/client.git +cd client +``` + +## Operation {#operation} + +### Manager {#manager} + +#### Setup Manager {#setup-manager} + +Configure `manager.yaml`, the default path is `/etc/dragonfly/manager.yaml`, +refer to [manager config](../reference/configuration/manager.md). + +Set the `database.mysql.addrs` and `database.redis.addrs` address in the configuration file to your actual address. +Configuration content is as follows: + +```yaml +# Manager configuration. +database: + type: mysql + mysql: + user: dragonfly-mysql + password: your_mysql_password + host: your_mysql_host + port: your_mysql_port + dbname: manager + migrate: true + redis: + addrs: + - dragonfly-redis + masterName: your_redis_master_name + username: your_redis_username + password: your_redis_passwprd + db: 0 + brokerDB: 1 + backendDB: 2 +``` + +Run Manager: + +> Notice : Run Manager under dragonfly + +```bash +# Setup Manager. +go run cmd/manager/main.go --config /etc/dragonfly/manager.yaml --verbose --console +``` + +#### Verify {#verify-manager} + +After the Manager deployment is complete, run the following commands to verify if **Manager** is started, +and if Port `8080` and `65003` is available. + +```bash +telnet 127.0.0.1 8080 +telnet 127.0.0.1 65003 +``` + +### Scheduler {#scheduler} + +#### Setup Scheduler {#setup-scheduler} + +Configure `scheduler.yaml`, the default path is `/etc/dragonfly/scheduler.yaml`, +refer to [scheduler config](../reference/configuration/scheduler.md). + +Set the `database.redis.addrs` and `manager.addr` address in the configuration file to your actual address. +Configuration content is as follows: + +```yaml +# Scheduler configuration. +database: + redis: + addrs: + - dragonfly-redis + masterName: your_redis_master_name + username: your_redis_username + password: your_redis_password + brokerDB: 1 + backendDB: 2 + manager: + addr: 127.0.0.1:65003 + schedulerClusterID: 1 + keepAlive: + interval: 5s +``` + +Run Scheduler: + +> Notice : Run Scheduler under dragonfly + +```bash +# Setup Scheduler. +go run cmd/scheduler/main.go --config /etc/dragonfly/scheduler.yaml --verbose --console +``` + +#### Verify {#verify-scheduler} + +After the Scheduler deployment is complete, run the following commands to verify if **Scheduler** is started, +and if Port `8002` is available. + +```bash +telnet 127.0.0.1 8002 +``` + +### Dfdaemon {#dfdaemon} + +#### Setup Dfdaemon as Seed Peer {#setup-dfdaemon-as-seed-peer} + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`, +refer to [dfdaemon config](../reference/configuration/client/dfdaemon.md). + +Set the `manager.addrs` address in the configuration file to your actual address. +Configuration content is as follows: + +```yaml +# Seed Peer configuration. +manager: + addr: http://127.0.0.1:65003 +seedPeer: + enable: true + type: super + clusterID: 1 +``` + +Run Dfdaemon as Seed Peer: + +> Notice : Run Dfdaemon under Client + +```bash +# Setup Dfdaemon. +cargo run --bin dfdaemon -- --config /etc/dragonfly/dfdaemon.yaml -l info --verbose +``` + +#### Verify {#verify-seed-peer} + +After the Seed Peer deployment is complete, run the following commands to verify if **Seed Peer** is started, +and if Port `4000`, `4001` and `4002` is available. + +```bash +telnet 127.0.0.1 4000 +telnet 127.0.0.1 4001 +telnet 127.0.0.1 4002 +``` + +#### Setup Dfdaemon as Peer {#setup-dfdaemon-as-Peer} + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`, +refer to [dfdaemon config](../reference/configuration/client/dfdaemon.md). + +Set the `manager.addrs` address in the configuration file to your actual address. +Configuration content is as follows: + +```yaml +# Peer configuration. +manager: + addr: http://127.0.0.1:65003 +``` + +Run Dfdaemon as Peer: + +> Notice : Run Dfdaemon under Client + +```bash +# Setup Dfdaemon. +cargo run --bin dfdaemon -- --config /etc/dragonfly/dfdaemon.yaml -l info --verbose +``` + +#### Verify {#verify-peer} + +After the Peer deployment is complete, run the following commands to verify if **Peer** is started, +and if Port `4000`, `4001` and `4002` is available. + +```bash +telnet 127.0.0.1 4000 +telnet 127.0.0.1 4001 +telnet 127.0.0.1 4002 +``` diff --git a/versioned_docs/version-v2.2.0/development-guide/plugins/in-tree-plugin.md b/versioned_docs/version-v2.2.0/development-guide/plugins/in-tree-plugin.md new file mode 100644 index 00000000..21adf265 --- /dev/null +++ b/versioned_docs/version-v2.2.0/development-guide/plugins/in-tree-plugin.md @@ -0,0 +1,7 @@ +--- +id: in-tree-plugin +title: In-tree Plugin +slug: /development-guide/plugins/in-tree-plugin/ +--- + +## TODO diff --git a/versioned_docs/version-v2.2.0/development-guide/plugins/out-of-tree-plugin.md b/versioned_docs/version-v2.2.0/development-guide/plugins/out-of-tree-plugin.md new file mode 100644 index 00000000..32635994 --- /dev/null +++ b/versioned_docs/version-v2.2.0/development-guide/plugins/out-of-tree-plugin.md @@ -0,0 +1,7 @@ +--- +id: out-of-tree-plugins +title: Out-of-tree Plugin +slug: /development-guide/plugins/out-of-tree-plugins/ +--- + +## TODO diff --git a/versioned_docs/version-v2.2.0/development-guide/running-tests.md b/versioned_docs/version-v2.2.0/development-guide/running-tests.md new file mode 100644 index 00000000..6a10ad0b --- /dev/null +++ b/versioned_docs/version-v2.2.0/development-guide/running-tests.md @@ -0,0 +1,56 @@ +--- +id: running-tests +title: Running Tests +slug: /development-guide/running-tests/ +--- + +This document describes how to run unit tests and E2E tests. + +## Prerequisites {#prerequisites} + + + +| Name | Version | Document | +| ------ | ------- | ------------------------------- | +| Golang | 1.16.x | [go.dev](https://go.dev/) | +| Rust | 1.6+ | [rustup.rs](https://rustup.rs/) | + + + +## Unit tests {#unit-tests} + +Unit tests is in the project directory. + +### Running unit tests {#running-unit-tests} + +```bash +make test +``` + +### Running uint tests with coverage reports {#running-uint-tests-with-coverage-reports} + +```bash +make test-coverage +``` + +## E2E tests {#e2e-tests} + +E2E tests is in `dragonfly/test/e2e` path. + +### Running E2E tests {#running-e2e-tests} + +```bash +make e2e-test +``` + +### Running E2E tests with coverage reports {#running-e2e-tests-with-coverage-reports} + +```bash +make e2e-test-coverage +``` + +### Clean E2E tests environment {#clean-e2e-tests-environment} + +```bash +make clean-e2e-test +``` diff --git a/versioned_docs/version-v2.2.0/getting-started/installation/binaries.md b/versioned_docs/version-v2.2.0/getting-started/installation/binaries.md new file mode 100644 index 00000000..cd198266 --- /dev/null +++ b/versioned_docs/version-v2.2.0/getting-started/installation/binaries.md @@ -0,0 +1,472 @@ +--- +id: binaries +title: Binaries +slug: /getting-started/installation/binaries/ +--- + +This guide shows how to install the Dragonfly. Dragonfly can be installed either from source, or from pre-built binary releases. + +## Prerequisites {#prerequisites} + + + +| Name | Version | Document | +| -------- | ---------------------------- | ---------------------------------------------------------------------------- | +| Git | 1.9.1+ | [git-scm](https://git-scm.com/) | +| Golang | 1.16.x | [go.dev](https://go.dev/) | +| Rust | 1.6+ | [rustup.rs](https://rustup.rs/) | +| Database | Mysql 5.6+ OR PostgreSQL 12+ | [mysql](https://www.mysql.com/) OR [postgresql](https://www.postgresql.org/) | +| Redis | 3.0+ | [redis.io](https://redis.io/) | + + + +## Install Dragonfly {#Install-dragonfly} + +### From the Binary Releases {#from-the-binary-releases} + +Pre-built binaries are available on our Dragonfly [releases page](https://github.com/dragonflyoss/dragonfly/releases). +These binary versions can be manually downloaded and installed. + +Download the Dragonfly binaries: + +> Notice: your_version is recommended to use the latest version. + + + +```bash +VERSION= +wget -O dragonfly_linux_amd64.tar.gz https://github.com/dragonflyoss/dragonfly/releases/download/v${VERSION}/dragonfly-${VERSION}-linux-amd64.tar.gz +``` + + + +Untar the package: + +```bash +# Replace `/path/to/dragonfly` with the installation directory. +tar -zxf dragonfly_linux_amd64.tar.gz -C /path/to/dragonfly +``` + +Pre-built binaries are available on our Client [releases page](https://github.com/dragonflyoss/client/releases). +These binary versions can be manually downloaded and installed. + +Download the Client binaries: + +> Notice: your_client_version is recommended to use the latest version. + +```bash +CLIENT_VERSION= +wget -O client_x86_64-unknown-linux-musl.tar.gz https://github.com/dragonflyoss/client/releases/download/v${CLIENT_VERSION}/client-v${CLIENT_VERSION}-x86_64-unknown-linux-musl.tar.gz +``` + +Untar the package: + +```bash +# Replace `/path/to/dragonfly` with the installation directory. +tar -zxf client_x86_64-unknown-linux-musl.tar.gz --strip-components=1 -C /path/to/dragonfly +``` + +Configuration environment: + +```bash +export PATH="/path/to/dragonfly:$PATH" +``` + +### From Source {#from-source} + +Clone the source code of Dragonfly: + +```bash +git clone --recurse-submodules https://github.com/dragonflyoss/dragonfly.git +cd dragonfly +``` + +Compile the source code: + +```bash +# At the same time to build scheduler and manager. +make build-manager && make build-scheduler + +# Install executable file to /opt/dragonfly/bin/{manager,scheduler}. +make install-manager +make install-scheduler +``` + +Clone the source code of Client: + +```bash +git clone https://github.com/dragonflyoss/client.git +cd client +``` + +Compile the source code: + +```bash +# At the same time to build dfdaemon and dfget. +cargo build --release --bins + +# Install executable file to /opt/dragonfly/bin/{dfget,dfdaemon}. +mv target/release/dfget /opt/dragonfly/bin/dfget +mv target/release/dfdaemon /opt/dragonfly/bin/dfdaemon +``` + +Configuration environment: + +```bash +export PATH="/opt/dragonfly/bin/:$PATH" +``` + +### Install Client using RPM {#install-client-using-rpm} + +#### Step 1: Install Client {#install-client-rpm} + +Download and execute the install script. + +> Notice: version is recommended to use the latest version. + +```bash +wget -O dragonfly-client-{arch}-unknown-linux-musl.rpm https://github.com/dragonflyoss/client/releases/download/v{version}/dragonfly-client-v{version}-{arch}-unknown-linux-musl.rpm + +rpm -ivh dragonfly-client-{arch}-unknown-linux-musl.rpm +``` + +Make sure to replace `arch` with one of the following: + +- `x86_64` +- `aarch64` + +#### Step 2: Create Dfdaemon Configuration {#create-dfdaemon-configuration-rpm} + +Create the dfdaemon configuration file `/etc/dragonfly/dfdaemon.yaml` +and modify the `manager.addrs` in the configuration file to your actual address, +refer to [dfdaemon config](../../reference/configuration/client/dfdaemon.md). + +##### Option 1: Setup Dfdaemon as Seed Peer {#setup-dfdaemon-as-seed-peer-rpm} + +```shell +# Seed Peer configuration. +manager: + addr: http://dragonfly-manager:65003 +seedPeer: + enable: true + type: super + clusterID: 1 +``` + +##### Option 2: Setup Dfdaemon as Peer {#setup-dfdaemon-as-peer-rpm} + +```shell +# Peer configuration. +manager: + addr: http://dragonfly-manager:65003 +``` + +#### Step 3: Run Dfdaemon with Systemd {#run-dfdaemon-with-systemd-rpm} + +Systemd starts dfdaemon service. + +> Notice:To start dfdaemon, you need to start the manager and scheduler first. + +```shell +$ sudo systemctl enable dfdaemon +$ sudo systemctl start dfdaemon +$ sudo systemctl status dfdaemon +● dfdaemon.service - dfdaemon is a high performance P2P download daemon + Loaded: loaded (/lib/systemd/system/dfdaemon.service; enabled; preset: enabled) + Active: active (running) since Mon 2024-08-05 17:46:39 UTC; 4s ago + Docs: https://github.com/dragonflyoss/client + Main PID: 2118 (dfdaemon) + Tasks: 13 (limit: 11017) + Memory: 15.0M (max: 8.0G available: 7.9G) + CPU: 83ms + CGroup: /system.slice/dfdaemon.service + └─2118 /usr/bin/dfdaemon --config /etc/dragonfly/dfdaemon.yaml --verbose +``` + +#### Step 4: Use Dfget to download files {#dfget-to-download-files-rpm} + +Use Dfget to download files, refer to [Dfget](../../reference/commands/client/dfget.md). + +```shell +# View Dfget cli help docs. +dfget --help + +# Download with HTTP protocol +dfget -O /path/to/output http://example.com/object +``` + +### Install Client using DEB {#install-client-using-deb} + +#### Step 1: Install Client {#install-client-deb} + +Download and execute the install script. + +> Notice: version is recommended to use the latest version. + +```bash +wget -O dragonfly-client-{arch}-unknown-linux-musl.deb +https://github.com/dragonflyoss/client/releases/download/v{version}/dragonfly-client-v{version}-{arch}-unknown-linux-musl.deb + + +dpkg -i dragonfly-client-{arch}-unknown-linux-musl.deb +``` + +Make sure to replace `arch` with one of the following: + +- `x86_64` +- `aarch64` + +#### Step 2: Create Dfdaemon Configuration {#create-dfdaemon-configuration-deb} + +Create the dfdaemon configuration file `/etc/dragonfly/dfdaemon.yaml` and modify the `manager.addrs` +in the configuration file to your actual address, +refer to [dfdaemon config](../../reference/configuration/client/dfdaemon.md). + +##### Option 1: Setup Dfdaemon as Seed Peer {#setup-dfdaemon-as-seed-peer-deb} + +```shell +# Seed Peer configuration. +manager: + addr: http://dragonfly-manager:65003 +seedPeer: + enable: true + type: super + clusterID: 1 +``` + +##### Option 2: Setup Dfdaemon as Peer {#setup-dfdaemon-as-peer-deb} + +```shell +# Peer configuration. +manager: + addr: http://dragonfly-manager:65003 +``` + +#### Step 3: Run Dfdaemon with Systemd {#run-dfdaemon-with-systemd-deb} + +Systemd starts dfdaemon service. + +> Notice:To start dfdaemon, you need to start the manager and scheduler first. + +```shell +$ sudo systemctl enable dfdaemon +$ sudo systemctl start dfdaemon +$ sudo systemctl status dfdaemon +● dfdaemon.service - dfdaemon is a high performance P2P download daemon + Loaded: loaded (/lib/systemd/system/dfdaemon.service; enabled; preset: enabled) + Active: active (running) since Mon 2024-08-05 17:46:39 UTC; 4s ago + Docs: https://github.com/dragonflyoss/client + Main PID: 2118 (dfdaemon) + Tasks: 13 (limit: 11017) + Memory: 15.0M (max: 8.0G available: 7.9G) + CPU: 83ms + CGroup: /system.slice/dfdaemon.service + └─2118 /usr/bin/dfdaemon --config /etc/dragonfly/dfdaemon.yaml --verbose +``` + +#### Step 4: Use Dfget to download files {#dfget-to-download-files-deb} + +Use Dfget to download files, refer to [Dfget](../../reference/commands/client/dfget.md). + +```shell +# View Dfget cli help docs. +dfget --help + +# Download with HTTP protocol +dfget -O /path/to/output http://example.com/object +``` + +## Operation {#operation} + +### Manager {#manager} + +#### Setup Manager {#setup-manager} + +Configure `manager.yaml`, the default path is `/etc/dragonfly/manager.yaml`, +refer to [manager config](../../reference/configuration/manager.md). + +Set the `database.mysql.addrs` and `database.redis.addrs` address in the configuration file to your actual address. +Configuration content is as follows: + +```yaml +# Manager configuration. +database: + type: mysql + mysql: + user: dragonfly-mysql + password: your_mysql_password + host: your_mysql_host + port: your_mysql_port + dbname: manager + migrate: true + redis: + addrs: + - dragonfly-redis + masterName: your_redis_master_name + username: your_redis_username + password: your_redis_passwprd + db: 0 + brokerDB: 1 + backendDB: 2 +``` + +Run Manager: + +```bash +# View Manager cli help docs. +manager --help + +# Setup Manager, it is recommended to start Manager via systemd. +manager +``` + +#### Verify {#verify-manager} + +After the Manager deployment is complete, run the following commands to verify if **Manager** is started, +and if Port `8080` and `65003` is available. + +```bash +telnet 127.0.0.1 8080 +telnet 127.0.0.1 65003 +``` + +#### Manager Console {#manager-console} + +Now you can open brower and visit console by localhost:8080, Console features preview reference document [console preview](../../advanced-guides/web-console.md). + +![manager-console](../../resource/getting-started/installation/manager-console.png) + +### Scheduler {#scheduler} + +#### Setup Scheduler {#setup-scheduler} + +Configure `scheduler.yaml`, the default path is `/etc/dragonfly/scheduler.yaml`, +refer to [scheduler config](../../reference/configuration/scheduler.md). + +Set the `database.redis.addrs` and `manager.addr` address in the configuration file to your actual address. +Configuration content is as follows: + +```yaml +# Scheduler configuration. +database: + redis: + addrs: + - dragonfly-redis + masterName: your_redis_master_name + username: your_redis_username + password: your_redis_password + brokerDB: 1 + backendDB: 2 + manager: + addr: dragonfly-manager:65003 + schedulerClusterID: 1 + keepAlive: + interval: 5s +``` + +Run Scheduler: + +```bash +# View Scheduler cli help docs. +scheduler --help + +# Setup Scheduler, it is recommended to start Scheduler via systemd. +scheduler +``` + +#### Verify {#verify-scheduler} + +After the Scheduler deployment is complete, run the following commands to verify if **Scheduler** is started, +and if Port `8002` is available. + +```bash +telnet 127.0.0.1 8002 +``` + +### Dfdaemon {#dfdaemon} + +#### Setup Dfdaemon as Seed Peer {#setup-dfdaemon-as-seed-peer} + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`, +refer to [dfdaemon config](../../reference/configuration/client/dfdaemon.md). + +Set the `manager.addrs` address in the configuration file to your actual address. +Configuration content is as follows: + +```yaml +# Seed Peer configuration. +manager: + addr: http://dragonfly-manager:65003 +seedPeer: + enable: true + type: super + clusterID: 1 +``` + +Run Dfdaemon as Seed Peer: + +```bash +# View Dfdaemon cli help docs. +dfdaemon --help + +# Setup Dfdaemon, it is recommended to start Dfdaemon via systemd. +dfdaemon +``` + +#### Verify {#verify-seed-peer} + +After the Seed Peer deployment is complete, run the following commands to verify if **Seed Peer** is started, +and if Port `4000`, `4001` and `4002` is available. + +```bash +telnet 127.0.0.1 4000 +telnet 127.0.0.1 4001 +telnet 127.0.0.1 4002 +``` + +#### Setup Dfdaemon as Peer {#setup-dfdaemon-as-Peer} + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`, +refer to [dfdaemon config](../../reference/configuration/client/dfdaemon.md). + +Set the `manager.addrs` address in the configuration file to your actual address. +Configuration content is as follows: + +```yaml +# Peer configuration. +manager: + addr: http://dragonfly-manager:65003 +``` + +Run Dfdaemon as Peer: + +```bash +# View Dfdaemon cli help docs. +dfdaemon --help + +# Setup Dfdaemon, it is recommended to start Dfdaemon via systemd. +dfdaemon +``` + +#### Verify {#verify-peer} + +After the Peer deployment is complete, run the following commands to verify if **Peer** is started, +and if Port `4000`, `4001` and `4002` is available. + +```bash +telnet 127.0.0.1 4000 +telnet 127.0.0.1 4001 +telnet 127.0.0.1 4002 +``` + +### Dfget + +Use Dfget to download files, refer to [Dfget](../../reference/commands/client/dfget.md). + +```shell +# View Dfget cli help docs. +dfget --help + +# Download with HTTP protocol +dfget -O /path/to/output http://example.com/object +``` diff --git a/versioned_docs/version-v2.2.0/getting-started/installation/helm-charts.md b/versioned_docs/version-v2.2.0/getting-started/installation/helm-charts.md new file mode 100644 index 00000000..fbdff88b --- /dev/null +++ b/versioned_docs/version-v2.2.0/getting-started/installation/helm-charts.md @@ -0,0 +1,383 @@ +--- +id: helm-charts +title: Helm Charts +slug: /getting-started/installation/helm-charts/ +--- + +Documentation for deploying Dragonfly on kubernetes using helm. + +For more integrations such as Docker, CRI-O, Singularity/Apptainer, Nydus, eStargz, Harbor, Git LFS, +Hugging Face, TorchServe, Triton Server, etc., refer to [Integrations](../../operations/integrations/container-runtime/containerd.md). + +## Prerequisites {#prerequisites} + +| Name | Version | Document | +| ------------------ | ------- | --------------------------------------- | +| Kubernetes cluster | 1.20+ | [kubernetes.io](https://kubernetes.io/) | +| Helm | v3.8.0+ | [helm.sh](https://helm.sh/) | +| containerd | v1.5.0+ | [containerd.io](https://containerd.io/) | + +## Setup kubernetes cluster {#setup-kubernetes-cluster} + +[Kind](https://kind.sigs.k8s.io/) is recommended if no Kubernetes cluster is available for testing. + +Create kind multi-node cluster configuration file `kind-config.yaml`, configuration content is as follows: + +```yaml +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + - role: worker +``` + +Create a kind multi-node cluster using the configuration file: + +```shell +kind create cluster --config kind-config.yaml +``` + +Switch the context of kubectl to kind cluster: + +```shell +kubectl config use-context kind-kind +``` + +## Kind loads Dragonfly image {#kind-loads-dragonfly-image} + +Pull Dragonfly latest images: + +```shell +docker pull dragonflyoss/scheduler:latest +docker pull dragonflyoss/manager:latest +docker pull dragonflyoss/client:latest +docker pull dragonflyoss/dfinit:latest +``` + +Kind cluster loads Dragonfly latest images: + +```shell +kind load docker-image dragonflyoss/scheduler:latest +kind load docker-image dragonflyoss/manager:latest +kind load docker-image dragonflyoss/client:latest +kind load docker-image dragonflyoss/dfinit:latest +``` + +## Create Dragonfly cluster based on helm charts {#create-dragonfly-cluster-based-on-helm-charts} + +Create the Helm Charts configuration file `values.yaml`, and set the container runtime to `containerd`. +Please refer to the [configuration](https://artifacthub.io/packages/helm/dragonfly/dragonfly#values) documentation for details. + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + +client: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + dfinit: + enable: true + image: + repository: dragonflyoss/dfinit + tag: latest + config: + containerRuntime: + containerd: + configPath: /etc/containerd/config.toml + registries: + - hostNamespace: docker.io + serverAddr: https://index.docker.io + capabilities: ['pull', 'resolve'] +``` + +Create a Dragonfly cluster using the configuration file: + + + +```shell +$ helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ +$ helm install --wait --create-namespace --namespace dragonfly-system dragonfly dragonfly/dragonfly -f values.yaml +NAME: dragonfly +LAST DEPLOYED: Thu Apr 18 19:26:39 2024 +NAMESPACE: dragonfly-system +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace dragonfly-system port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +2. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +3. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ +``` + + + +Check that Dragonfly is deployed successfully: + +```shell +$ kubectl get po -n dragonfly-system +NAME READY STATUS RESTARTS AGE +dragonfly-client-gvspg 1/1 Running 0 34m +dragonfly-client-kxrhh 1/1 Running 0 34m +dragonfly-manager-864774f54d-6t79l 1/1 Running 0 34m +dragonfly-mysql-0 1/1 Running 0 34m +dragonfly-redis-master-0 1/1 Running 0 34m +dragonfly-redis-replicas-0 1/1 Running 0 34m +dragonfly-redis-replicas-1 1/1 Running 0 32m +dragonfly-redis-replicas-2 1/1 Running 0 32m +dragonfly-scheduler-0 1/1 Running 0 34m +dragonfly-seed-client-0 1/1 Running 5 (21m ago) 34m +``` + +## Containerd downloads images through Dragonfly {#containerd-downloads-images-through-dragonfly} + +Pull `alpine:3.19` image in kind-worker node: + +```shell +docker exec -i kind-worker /usr/local/bin/crictl pull alpine:3.19 +``` + +### Verify {#verify} + +You can execute the following command to check if the `alpine:3.19` image is distributed via Dragonfly. + + + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="kind-worker")].metadata.name}' | head -n 1 ) + +# Find task id. +export TASK_ID=$(kubectl -n dragonfly-system exec ${POD_NAME} -- sh -c "grep -hoP 'library/alpine.*task_id=\"\K[^\"]+' /var/log/dragonfly/dfdaemon/* | head -n 1") + +# Check logs. +kubectl -n dragonfly-system exec -it ${POD_NAME} -- sh -c "grep ${TASK_ID} /var/log/dragonfly/dfdaemon/* | grep 'download task succeeded'" + +# Download logs. +kubectl -n dragonfly-system exec ${POD_NAME} -- sh -c "grep ${TASK_ID} /var/log/dragonfly/dfdaemon/*" > dfdaemon.log +``` + + + +The expected output is as follows: + +```shell +{ + 2024-04-19T02:44:09.259458Z INFO + "download_task":"dragonfly-client/src/grpc/dfdaemon_download.rs:276":: "download task succeeded" + "host_id": "172.18.0.3-kind-worker", + "task_id": "a46de92fcb9430049cf9e61e267e1c3c9db1f1aa4a8680a048949b06adb625a5", + "peer_id": "172.18.0.3-kind-worker-86e48d67-1653-4571-bf01-7e0c9a0a119d" +} + +``` + +## Performance testing {#performance-testing} + +### Containerd pull image back-to-source for the first time through Dragonfly {#containerd-pull-image-back-to-source-for-the-first-time-through-dragonfly} + +Pull `alpine:3.19` image in `kind-worker` node: + +```shell +time docker exec -i kind-worker /usr/local/bin/crictl pull alpine:3.19 +``` + +When pull image back-to-source for the first time through Dragonfly, it takes `28.82s` to download the `alpine:3.19` image. + +### Containerd pull image hits the cache of remote peer {#containerd-pull-image-hits-the-cache-of-remote-peer} + +Delete the client whose Node is `kind-worker` to clear the cache of Dragonfly local Peer. + + + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="kind-worker")].metadata.name}' | head -n 1 ) + +# Delete pod. +kubectl delete pod ${POD_NAME} -n dragonfly-system +``` + + + +Delete `alpine:3.19` image in `kind-worker` node: + +```shell +docker exec -i kind-worker /usr/local/bin/crictl rmi alpine:3.19 +``` + +Pull `alpine:3.19` image in `kind-worker` node: + +```shell +time docker exec -i kind-worker /usr/local/bin/crictl pull alpine:3.19 +``` + +When pull image hits cache of remote peer, it takes `12.524s` to download the +`alpine:3.19` image. + +### Containerd pull image hits the cache of local peer {#containerd-pull-image-hits-the-cache-of-local-peer} + +Delete `alpine:3.19` image in `kind-worker` node: + +```shell +docker exec -i kind-worker /usr/local/bin/crictl rmi alpine:3.19 +``` + +Pull `alpine:3.19` image in `kind-worker` node: + +```shell +time docker exec -i kind-worker /usr/local/bin/crictl pull alpine:3.19 +``` + +When pull image hits cache of local peer, it takes `7.432s` to download the +`alpine:3.19` image. + +## Preheat image {#preheat-image} + +Expose manager's port `8080`: + +```shell +kubectl --namespace dragonfly-system port-forward service/dragonfly-manager 8080:8080 +``` + +Please create personal access Token before calling Open API, and select `job` for access scopes, refer to [personal-access-tokens](../../advanced-guides/personal-access-tokens.md). + +Use Open API to preheat the image `alpine:3.19` to Seed Peer, refer to [preheat](../../advanced-guides/preheat.md). + +```shell +curl --location --request POST 'http://127.0.0.1:8080/oapi/v1/jobs' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_personal_access_token' \ +--data-raw '{ + "type": "preheat", + "args": { + "type": "image", + "url": "https://index.docker.io/v2/library/alpine/manifests/3.19", + "filteredQueryParams": "Expires&Signature", + "username": "your_registry_username", + "password": "your_registry_password" + } +}' +``` + +The command-line log returns the preheat job id: + +```json +{ + "id": 1, + "created_at": "2024-04-18T08:51:55Z", + "updated_at": "2024-04-18T08:51:55Z", + "task_id": "group_2717f455-ff0a-435f-a3a7-672828d15a2a", + "type": "preheat", + "state": "PENDING", + "args": { + "filteredQueryParams": "Expires&Signature", + "headers": null, + "password": "", + "pieceLength": 4194304, + "platform": "", + "tag": "", + "type": "image", + "url": "https://index.docker.io/v2/library/alpine/manifests/3.19", + "username": "" + }, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-04-18T08:29:15Z", + "updated_at": "2024-04-18T08:29:15Z", + "name": "cluster-1" + } + ] +} +``` + +Polling the preheating status with job id: + +```shell +curl --request GET 'http://127.0.0.1:8080/oapi/v1/jobs/1' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_personal_access_token' +``` + +If the status is `SUCCESS`, the preheating is successful: + +```json +{ + "id": 1, + "created_at": "2024-04-18T08:51:55Z", + "updated_at": "2024-04-18T08:51:55Z", + "task_id": "group_2717f455-ff0a-435f-a3a7-672828d15a2a", + "type": "preheat", + "state": "SUCCESS", + "args": { + "filteredQueryParams": "Expires&Signature", + "headers": null, + "password": "", + "pieceLength": 4194304, + "platform": "", + "tag": "", + "type": "image", + "url": "https://index.docker.io/v2/library/alpine/manifests/3.19", + "username": "" + }, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-04-18T08:29:15Z", + "updated_at": "2024-04-18T08:29:15Z", + "name": "cluster-1" + } + ] +} +``` + +Pull `alpine:3.19` image in `kind-worker` node: + +```shell +time docker exec -i kind-worker /usr/local/bin/crictl pull alpine:3.19 +``` + +When pull image hits preheat cache, it takes `11.030s` to download the +`alpine:3.19` image. diff --git a/versioned_docs/version-v2.2.0/getting-started/quick-start.md b/versioned_docs/version-v2.2.0/getting-started/quick-start.md new file mode 100644 index 00000000..ac484540 --- /dev/null +++ b/versioned_docs/version-v2.2.0/getting-started/quick-start.md @@ -0,0 +1,10 @@ +--- +id: quick-start +title: Quick Start +slug: /getting-started/quick-start +--- + +Table of contents: + +- [Kubernetes](./quick-start/kubernetes.md) +- [Multi-cluster Kubernetes](./quick-start/multi-cluster-kubernetes.md) diff --git a/versioned_docs/version-v2.2.0/getting-started/quick-start/kubernetes.md b/versioned_docs/version-v2.2.0/getting-started/quick-start/kubernetes.md new file mode 100644 index 00000000..3cdb67ae --- /dev/null +++ b/versioned_docs/version-v2.2.0/getting-started/quick-start/kubernetes.md @@ -0,0 +1,383 @@ +--- +id: kubernetes +title: Kubernetes +description: Kubernetes +slug: /getting-started/quick-start/kubernetes/ +--- + +Documentation for deploying Dragonfly on kubernetes using helm. + +## Runtime + +You can have a quick start following [Helm Charts](../installation/helm-charts.md). +It is recommended to use `containerd`. + +| Runtime | Version | Document | +| ---------- | -------- | --------------------------------------------------------------------- | +| containerd | v1.1.0+ | [Link](../../operations/integrations/container-runtime/containerd.md) | +| Docker | v20.0.1+ | [Link](../../operations/integrations/container-runtime/docker.md) | +| CRI-O | All | [Link](../../operations/integrations/container-runtime/cri-o.md) | + +## Setup kubernetes cluster {#setup-kubernetes-cluster} + +[Kind](https://kind.sigs.k8s.io/) is recommended if no Kubernetes cluster is available for testing. + +Create kind multi-node cluster configuration file `kind-config.yaml`, configuration content is as follows: + +```yaml +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + - role: worker +``` + +Create a kind multi-node cluster using the configuration file: + +```shell +kind create cluster --config kind-config.yaml +``` + +Switch the context of kubectl to kind cluster: + +```shell +kubectl config use-context kind-kind +``` + +## Kind loads Dragonfly image {#kind-loads-dragonfly-image} + +Pull Dragonfly latest images: + +```shell +docker pull dragonflyoss/scheduler:latest +docker pull dragonflyoss/manager:latest +docker pull dragonflyoss/client:latest +docker pull dragonflyoss/dfinit:latest +``` + +Kind cluster loads Dragonfly latest images: + +```shell +kind load docker-image dragonflyoss/scheduler:latest +kind load docker-image dragonflyoss/manager:latest +kind load docker-image dragonflyoss/client:latest +kind load docker-image dragonflyoss/dfinit:latest +``` + +## Create Dragonfly cluster based on helm charts {#create-dragonfly-cluster-based-on-helm-charts} + +Create helm charts configuration file `charts-config.yaml`, configuration content is as follows: + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + +client: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + dfinit: + enable: true + image: + repository: dragonflyoss/dfinit + tag: latest + config: + containerRuntime: + containerd: + configPath: /etc/containerd/config.toml + registries: + - hostNamespace: docker.io + serverAddr: https://index.docker.io + capabilities: ['pull', 'resolve'] +``` + +Create a Dragonfly cluster using the configuration file: + + + +```shell +$ helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ +$ helm install --wait --create-namespace --namespace dragonfly-system dragonfly dragonfly/dragonfly -f charts-config.yaml +NAME: dragonfly +LAST DEPLOYED: Tue Apr 16 11:23:00 2024 +NAMESPACE: dragonfly-system +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace dragonfly-system port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +2. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +3. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ +``` + + + +Check that Dragonfly is deployed successfully: + +```shell +$ kubectl get po -n dragonfly-system +NAME READY STATUS RESTARTS AGE +dragonfly-client-dhqfc 1/1 Running 0 13m +dragonfly-client-h58x6 1/1 Running 0 13m +dragonfly-manager-7b4fd85458-fjtpk 1/1 Running 0 13m +dragonfly-mysql-0 1/1 Running 0 13m +dragonfly-redis-master-0 1/1 Running 0 13m +dragonfly-redis-replicas-0 1/1 Running 0 13m +dragonfly-redis-replicas-1 1/1 Running 0 11m +dragonfly-redis-replicas-2 1/1 Running 0 10m +dragonfly-scheduler-0 1/1 Running 0 13m +dragonfly-seed-client-0 1/1 Running 2 (76s ago) 13m +``` + +## Containerd downloads images through Dragonfly {#containerd-downloads-images-through-dragonfly} + +Pull `alpine:3.19` image in kind-worker node: + +```shell +docker exec -i kind-worker /usr/local/bin/crictl pull alpine:3.19 +``` + +### Verify {#verify} + +You can execute the following command to check if the `alpine:3.19` image is distributed via Dragonfly. + + + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="kind-worker")].metadata.name}' | head -n 1 ) + +# Find task id. +export TASK_ID=$(kubectl -n dragonfly-system exec ${POD_NAME} -- sh -c "grep -hoP 'library/alpine.*task_id=\"\K[^\"]+' /var/log/dragonfly/dfdaemon/* | head -n 1") + +# Check logs. +kubectl -n dragonfly-system exec -it ${POD_NAME} -- sh -c "grep ${TASK_ID} /var/log/dragonfly/dfdaemon/* | grep 'download task succeeded'" + +# Download logs. +kubectl -n dragonfly-system exec ${POD_NAME} -- sh -c "grep ${TASK_ID} /var/log/dragonfly/dfdaemon/*" > dfdaemon.log +``` + + + +The expected output is as follows: + +```shell +{ + 2024-04-19T02:44:09.259458Z INFO + "download_task":"dragonfly-client/src/grpc/dfdaemon_download.rs:276":: "download task succeeded" + "host_id": "172.18.0.3-kind-worker", + "task_id": "a46de92fcb9430049cf9e61e267e1c3c9db1f1aa4a8680a048949b06adb625a5", + "peer_id": "172.18.0.3-kind-worker-86e48d67-1653-4571-bf01-7e0c9a0a119d" +} +``` + +## Performance testing {#performance-testing} + +### Containerd pull image back-to-source for the first time through Dragonfly {#containerd-pull-image-back-to-source-for-the-first-time-through-dragonfly} + +Pull `alpine:3.19` image in `kind-worker` node: + +```shell +time docker exec -i kind-worker /usr/local/bin/crictl pull alpine:3.19 +``` + +When pull image back-to-source for the first time through Dragonfly, it takes `37.852s` to download the +`alpine:3.19` image. + +### Containerd pull image hits the cache of remote peer {#containerd-pull-image-hits-the-cache-of-remote-peer} + +Delete the client whose Node is `kind-worker` to clear the cache of Dragonfly local Peer. + + + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="kind-worker")].metadata.name}' | head -n 1 ) + +# Delete pod. +kubectl delete pod ${POD_NAME} -n dragonfly-system +``` + + + +Delete `alpine:3.19` image in `kind-worker` node: + +```shell +docker exec -i kind-worker /usr/local/bin/crictl rmi alpine:3.19 +``` + +Pull `alpine:3.19` image in `kind-worker` node: + +```shell +time docker exec -i kind-worker /usr/local/bin/crictl pull alpine:3.19 +``` + +When pull image hits cache of remote peer, it takes `6.942s` to download the +`alpine:3.19` image. + +### Containerd pull image hits the cache of local peer {#containerd-pull-image-hits-the-cache-of-local-peer} + +Delete `alpine:3.19` image in `kind-worker` node: + +```shell +docker exec -i kind-worker /usr/local/bin/crictl rmi alpine:3.19 +``` + +Pull `alpine:3.19` image in `kind-worker` node: + +```shell +time docker exec -i kind-worker /usr/local/bin/crictl pull alpine:3.19 +``` + +When pull image hits cache of local peer, it takes `5.540s` to download the +`alpine:3.19` image. + +## Preheat image {#preheat-image} + +Expose manager's port `8080`: + +```shell +kubectl --namespace dragonfly-system port-forward service/dragonfly-manager 8080:8080 +``` + +Please create personal access Token before calling Open API, and select `job` for access scopes, refer to [personal-access-tokens](../../advanced-guides/personal-access-tokens.md). + +Use Open API to preheat the image `alpine:3.19` to Seed Peer, refer to [preheat](../../advanced-guides/preheat.md). + +```shell +curl --location --request POST 'http://127.0.0.1:8080/oapi/v1/jobs' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_personal_access_token' \ +--data-raw '{ + "type": "preheat", + "args": { + "type": "image", + "url": "https://index.docker.io/v2/library/alpine/manifests/3.19", + "filteredQueryParams": "Expires&Signature", + "username": "your_registry_username", + "password": "your_registry_password" + } +}' +``` + +The command-line log returns the preheat job id: + +```json +{ + "id": 1, + "created_at": "2024-04-18T08:51:55Z", + "updated_at": "2024-04-18T08:51:55Z", + "task_id": "group_2717f455-ff0a-435f-a3a7-672828d15a2a", + "type": "preheat", + "state": "SUCCESS", + "args": { + "filteredQueryParams": "Expires&Signature", + "headers": null, + "password": "", + "pieceLength": 4194304, + "platform": "", + "tag": "", + "type": "image", + "url": "https://index.docker.io/v2/library/alpine/manifests/3.19", + "username": "" + }, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-04-18T08:29:15Z", + "updated_at": "2024-04-18T08:29:15Z", + "name": "cluster-1" + } + ] +} +``` + +Polling the preheating status with job id: + +```shell +curl --request GET 'http://127.0.0.1:8080/oapi/v1/jobs/1' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer your_personal_access_token' +``` + +If the status is `SUCCESS`, the preheating is successful: + +```json +{ + "id": 1, + "created_at": "2024-04-18T08:51:55Z", + "updated_at": "2024-04-18T08:51:55Z", + "task_id": "group_2717f455-ff0a-435f-a3a7-672828d15a2a", + "type": "preheat", + "state": "PENDING", + "args": { + "filteredQueryParams": "Expires&Signature", + "headers": null, + "password": "", + "pieceLength": 4194304, + "platform": "", + "tag": "", + "type": "image", + "url": "https://index.docker.io/v2/library/alpine/manifests/3.19", + "username": "" + }, + "scheduler_clusters": [ + { + "id": 1, + "created_at": "2024-04-18T08:29:15Z", + "updated_at": "2024-04-18T08:29:15Z", + "name": "cluster-1" + } + ] +} +``` + +Pull `alpine:3.19` image in `kind-worker` node: + +```shell +time docker exec -i kind-worker /usr/local/bin/crictl pull alpine:3.19 +``` + +When pull image hits preheat cache, it takes `2.952s` to download the +`alpine:3.19` image. diff --git a/versioned_docs/version-v2.2.0/getting-started/quick-start/multi-cluster-kubernetes.md b/versioned_docs/version-v2.2.0/getting-started/quick-start/multi-cluster-kubernetes.md new file mode 100644 index 00000000..e3a86171 --- /dev/null +++ b/versioned_docs/version-v2.2.0/getting-started/quick-start/multi-cluster-kubernetes.md @@ -0,0 +1,532 @@ +--- +id: multi-cluster-kubernetes +title: Multi-cluster Kubernetes +description: Multi-cluster kubernetes +slug: /getting-started/quick-start/multi-cluster-kubernetes/ +--- + +Documentation for deploying Dragonfly on multi-cluster kubernetes using helm. A Dragonfly cluster manages cluster within +a network. If you have two clusters with disconnected networks, you can use two Dragonfly clusters to manage their own clusters. + +The recommended deployment in a multi-cluster kubernetes is to use a Dragonfly cluster to manage a kubernetes cluster, +and use a centralized manager service to manage multiple Dragonfly clusters. Because peer can only transmit data in +its own Dragonfly cluster, if a kubernetes cluster deploys a Dragonfly cluster, then a kubernetes cluster forms a p2p network, +and internal peers can only schedule and transmit data in a kubernetes cluster. + +![multi-cluster-kubernetes](../../resource/getting-started/multi-cluster-kubernetes.png) + +## Runtime + +You can have a quick start following [Helm Charts](../installation/helm-charts.md). +It is recommended to use `containerd`. + +| Runtime | Version | Document | +| ---------- | -------- | --------------------------------------------------------------------- | +| containerd | v1.1.0+ | [Link](../../operations/integrations/container-runtime/containerd.md) | +| Docker | v20.0.1+ | [Link](../../operations/integrations/container-runtime/docker.md) | +| CRI-O | All | [Link](../../operations/integrations/container-runtime/cri-o.md) | + +## Setup kubernetes cluster + +[Kind](https://kind.sigs.k8s.io/) is recommended if no Kubernetes cluster is available for testing. + +Create kind cluster configuration file `kind-config.yaml`, configuration content is as follows: + +```yaml +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + extraPortMappings: + - containerPort: 30950 + hostPort: 8080 + labels: + cluster: a + - role: worker + labels: + cluster: a + - role: worker + labels: + cluster: b + - role: worker + labels: + cluster: b +``` + +Create cluster using the configuration file: + +```shell +kind create cluster --config kind-config.yaml +``` + +Switch the context of kubectl to kind cluster A: + +```shell +kubectl config use-context kind-kind +``` + +## Kind loads Dragonfly image {#kind-loads-dragonfly-image} + +Pull Dragonfly latest images: + +```shell +docker pull dragonflyoss/scheduler:latest +docker pull dragonflyoss/manager:latest +docker pull dragonflyoss/client:latest +docker pull dragonflyoss/dfinit:latest +``` + +Kind cluster loads Dragonfly latest images: + +```shell +kind load docker-image dragonflyoss/scheduler:latest +kind load docker-image dragonflyoss/manager:latest +kind load docker-image dragonflyoss/client:latest +kind load docker-image dragonflyoss/dfinit:latest +``` + +## Create Dragonfly cluster A + +Create Dragonfly cluster A, the schedulers, seed peers, peers and centralized manager included in +the cluster should be installed using helm. + +### Create Dragonfly cluster A based on helm charts + +Create Dragonfly cluster A charts configuration file `charts-config-cluster-a.yaml`, configuration content is as follows: + +```yaml +manager: + nodeSelector: + cluster: a + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + nodeSelector: + cluster: a + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + nodeSelector: + cluster: a + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + +client: + nodeSelector: + cluster: a + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + + dfinit: + enable: true + image: + repository: dragonflyoss/dfinit + tag: latest + config: + containerRuntime: + containerd: + configPath: /etc/containerd/config.toml + registries: + - hostNamespace: docker.io + serverAddr: https://index.docker.io + capabilities: ['pull', 'resolve'] +``` + +Create Dragonfly cluster A using the configuration file: + + + +```shell +$ helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ +$ helm install --wait --create-namespace --namespace cluster-a dragonfly dragonfly/dragonfly -f charts-config-cluster-a.yaml +NAME: dragonfly +LAST DEPLOYED: Tue Apr 16 16:12:42 2024 +NAMESPACE: cluster-a +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace cluster-a -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace cluster-a $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace cluster-a port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +2. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace cluster-a -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace cluster-a $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +3. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ +``` + + + +Check that Dragonfly cluster A is deployed successfully: + +```shell +$ kubectl get po -n cluster-a +NAME READY STATUS RESTARTS AGE +dragonfly-client-5gvz7 1/1 Running 0 51m +dragonfly-client-xvqmq 1/1 Running 0 51m +dragonfly-manager-dc6dcf87b-l88mr 1/1 Running 0 51m +dragonfly-mysql-0 1/1 Running 0 51m +dragonfly-redis-master-0 1/1 Running 0 51m +dragonfly-redis-replicas-0 1/1 Running 0 51m +dragonfly-redis-replicas-1 1/1 Running 0 48m +dragonfly-redis-replicas-2 1/1 Running 0 39m +dragonfly-scheduler-0 1/1 Running 0 51m +dragonfly-seed-client-0 1/1 Running 0 51m +``` + +### Create NodePort service of the manager REST service + +Create the manager REST service configuration file `manager-rest-svc.yaml`, +configuration content is as follows: + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: manager-rest + namespace: cluster-a +spec: + type: NodePort + ports: + - name: http + nodePort: 30950 + port: 8080 + selector: + app: dragonfly + component: manager + release: dragonfly +``` + +Create manager REST service using the configuration file: + +```shell +kubectl apply -f manager-rest-svc.yaml -n cluster-a +``` + +### Visit manager console + +Visit address `localhost:8080` to see the manager console. Sign in the console with the default root user, +the username is `root` and password is `dragonfly`. + +![signin](../../resource/getting-started/signin.png) + +![clusters](../../resource/getting-started/clusters.png) + +By default, Dragonfly will automatically create Dragonfly cluster A record in manager when +it is installed for the first time. You can click Dragonfly cluster A to view the details. + +![cluster-a](../../resource/getting-started/cluster-a.png) + +## Create Dragonfly cluster B + +Create Dragonfly cluster B, you need to create a Dragonfly cluster record in the manager console first, +and the schedulers, seed peers and peers included in the Dragonfly cluster should be installed using helm. + +### Create Dragonfly cluster B in the manager console + +Visit manager console and click the `ADD CLUSTER` button to add Dragonfly cluster B record. +Note that the IDC is set to `cluster-2` to match the peer whose IDC is `cluster-2`. + +![create-cluster-b](../../resource/getting-started/create-cluster-b.png) + +Create Dragonfly cluster B record successfully. + +![create-cluster-b-successfully](../../resource/getting-started/create-cluster-b-successfully.png) + +### Use scopes to distinguish different Dragonfly clusters + +The Dragonfly cluster needs to serve the scope. It wil provide scheduler services and +seed peer services to peers in the scope. The scopes of the Dragonfly cluster are configured +when the console is created and updated. The scopes of the peer are configured in peer YAML config, +the fields are `host.idc`, `host.location` and `host.advertiseIP`, +refer to [dfdaemon config](../../reference/configuration/client/dfdaemon.md). + +If the peer scopes match the Dragonfly cluster scopes, then the peer will use +the Dragonfly cluster's scheduler and seed peer first, and if there is no matching +Dragonfly cluster then use the default Dragonfly cluster. + +**Location**: The Dragonfly cluster needs to serve all peers in the location. When the location in +the peer configuration matches the location in the Dragonfly cluster, the peer will preferentially +use the scheduler and the seed peer of the Dragonfly cluster. It separated by "|", +for example "area|country|province|city". + +**IDC**: The Dragonfly cluster needs to serve all peers in the IDC. When the IDC in the peer +configuration matches the IDC in the Dragonfly cluster, the peer will preferentially use the +scheduler and the seed peer of the Dragonfly cluster. IDC has higher priority than location +in the scopes. + +**CIDRs**: The Dragonfly cluster needs to serve all peers in the CIDRs. The advertise IP will be reported in the peer +configuration when the peer is started, and if the advertise IP is empty in the peer configuration, +peer will automatically get expose IP as advertise IP. When advertise IP of the peer matches the CIDRs in Dragonfly cluster, +the peer will preferentially use the scheduler and the seed peer of the Dragonfly cluster. +CIDRs has higher priority than IDC in the scopes. + +**Hostnames**: The cluster needs to serve all peers in Hostnames. The input parameter is the multiple Hostnames regexes. +The Hostnames will be reported in the peer configuration when the peer is started. +When the Hostnames matches the multiple Hostnames regexes in the cluster, +the peer will preferentially use the scheduler and the seed peer of the cluster. +Hostnames has higher priority than IDC in the scopes. +Hostnames has priority equal to CIDRs in the scopes. + +### Create Dragonfly cluster B based on helm charts + +Create charts configuration with cluster information in the manager console. + +![cluster-b-information](../../resource/getting-started/cluster-b-information.png) + +- `Scheduler.config.manager.schedulerClusterID` using the `Scheduler cluster ID` + from `cluster-2` information in the manager console. +- `Scheduler.config.manager.addr` is address of the manager GRPC server. +- `seedClient.config.seedPeer.clusterID` using the `Seed peer cluster ID` + from `cluster-2` information in the manager console. +- `seedClient.config.manager.addrs` is address of the manager GRPC server. +- `client.config.host.idc` using the `IDC` from `cluster-2` information in the manager console. +- `client.config.manager.addrs` is address of the manager GRPC server. +- `externalManager.host` is host of the manager GRPC server. +- `externalRedis.addrs[0]` is address of the redis. + +Create Dragonfly cluster B charts configuration file `charts-config-cluster-b.yaml`, +configuration content is as follows: + +```yaml +scheduler: + nodeSelector: + cluster: b + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + manager: + addr: dragonfly-manager.cluster-a.svc.cluster.local:65003 + schedulerClusterID: 2 + +seedClient: + nodeSelector: + cluster: b + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + manager: + addr: http://dragonfly-manager.cluster-a.svc.cluster.local:65003 + seedPeer: + clusterID: 2 + +client: + nodeSelector: + cluster: b + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + dfinit: + enable: true + image: + repository: dragonflyoss/dfinit + tag: latest + config: + containerRuntime: + containerd: + configPath: /etc/containerd/config.toml + registries: + - hostNamespace: docker.io + serverAddr: https://index.docker.io + capabilities: ['pull', 'resolve'] + config: + host: + idc: cluster-2 + manager: + addr: http://dragonfly-manager.cluster-a.svc.cluster.local:65003 + +manager: + enable: false + +externalManager: + enable: true + host: dragonfly-manager.cluster-a.svc.cluster.local + restPort: 8080 + grpcPort: 65003 + +redis: + enable: false + +externalRedis: + addrs: + - dragonfly-redis-master.cluster-a.svc.cluster.local:6379 + password: dragonfly + +mysql: + enable: false +``` + +Create Dragonfly cluster B using the configuration file: + + + +```shell +$ helm install --wait --create-namespace --namespace cluster-b dragonfly dragonfly/dragonfly -f charts-config-cluster-b.yaml +NAME: dragonfly +LAST DEPLOYED: Tue Apr 16 15:49:42 2024 +NAMESPACE: cluster-b +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace cluster-b -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace cluster-b $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace cluster-b port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +2. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace cluster-b -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace cluster-b $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +3. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ +``` + + + +Check that Dragonfly cluster B is deployed successfully: + +```shell +$ kubectl get po -n cluster-b +NAME READY STATUS RESTARTS AGE +dragonfly-client-f4897 1/1 Running 0 10m +dragonfly-client-m9k9f 1/1 Running 0 10m +dragonfly-scheduler-0 1/1 Running 0 10m +dragonfly-seed-client-0 1/1 Running 0 10m +``` + +Create dragonfly cluster B successfully. + +![install-cluster-b-successfully](../../resource/getting-started/install-cluster-b-successfully.png) + +## Using Dragonfly to distribute images for multi-cluster kubernetes + +### Containerd pull image back-to-source for the first time through Dragonfly in cluster A + +Pull `alpine:3.19` image in `kind-worker` node: + +```shell +time docker exec -i kind-worker /usr/local/bin/crictl pull alpine:3.19 +``` + +When pull image back-to-source for the first time through Dragonfly, peer uses `cluster-a`'s scheduler and seed peer. +It takes `31.714s` to download the `alpine:3.19` image. + +### Containerd pull image hits the cache of remote peer in cluster A + +Delete the client whose Node is `kind-worker` to clear the cache of Dragonfly local Peer. + + + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace cluster-a -l "app=dragonfly,release=dragonfly,component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="kind-worker")].metadata.name}' | head -n 1 ) + +# Delete pod. +kubectl delete pod ${POD_NAME} -n cluster-a +``` + + + +Delete `alpine:3.19` image in `kind-worker` node: + +```shell +docker exec -i kind-worker /usr/local/bin/crictl rmi alpine:3.19 +``` + +Pull `alpine:3.19` image in `kind-worker` node: + +```shell +time docker exec -i kind-worker /usr/local/bin/crictl pull alpine:3.19 +``` + +When pull image hits cache of remote peer, peer uses `cluster-a`'s scheduler and seed peer. +It takes `7.304s` to download the `alpine:3.19` image. + +### Containerd pull image back-to-source for the first time through dragonfly in cluster B + +Pull `alpine:3.19` image in `kind-worker3` node: + +```shell +time docker exec -i kind-worker3 /usr/local/bin/crictl pull alpine:3.19 +``` + +When pull image back-to-source for the first time through Dragonfly, peer uses `cluster-b`'s scheduler and seed peer. +It takes `36.208s` to download the `alpine:3.19` image. + +### Containerd pull image hits the cache of remote peer in cluster B + +Delete the client whose Node is `kind-worker3` to clear the cache of Dragonfly local Peer. + + + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace cluster-b -l "app=dragonfly,release=dragonfly,component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="kind-worker3")].metadata.name}' | head -n 1 ) + +# Delete pod. +kubectl delete pod ${POD_NAME} -n cluster-b +``` + + + +Delete `alpine:3.19` image in `kind-worker3` node: + +```shell +docker exec -i kind-worker3 /usr/local/bin/crictl rmi alpine:3.19 +``` + +Pull `alpine:3.19` image in `kind-worker3` node: + +```shell +time docker exec -i kind-worker3 /usr/local/bin/crictl pull alpine:3.19 +``` + +When pull image hits cache of remote peer, peer uses `cluster-b`'s scheduler and seed peer. +It takes `6.963s` to download the `alpine:3.19` image. diff --git a/versioned_docs/version-v2.2.0/introduction.md b/versioned_docs/version-v2.2.0/introduction.md new file mode 100644 index 00000000..d6d0ea88 --- /dev/null +++ b/versioned_docs/version-v2.2.0/introduction.md @@ -0,0 +1,66 @@ +--- +id: introduction +title: Introduction +description: Dragonfly is an file distribution and image acceleration based on p2p technology.It is designed to increase the efficiency of large-scale data distribution and improve idle bandwidth usage of peer. It is widely used in various domains such as image acceleration, file distribution, AI model distribution, AI dataset distribution, etc. +slug: / +--- + +Dragonfly is an file distribution and image acceleration based on p2p technology. +It is designed to increase the efficiency of large-scale data distribution and improve idle bandwidth usage of peer. +It is widely used in various domains such as image acceleration, file distribution, +AI model distribution, AI dataset distribution, etc. + +## Features {#features} + +Here are some of the features that Dragonfly offers: + +- **P2P technology**: Based on P2P technology, use the idle bandwidth of Peer to improve download speed. +- **Non-intrusive**: Non-intrusive support for multiple container runtimes, download tools, AI infrastructure, etc. +- **Peer configuration**: Load limit, concurrent limit, traffic limit, etc. can be configured. +- **Consistency**: Ensures downloaded files are consistent even if the user does not check for consistency. +- **Exception isolation**: Isolate exceptions based on Service level, Peer level and Task level to improve + download stability. +- **Ecosystem**: Provides simple integration with AI infrastructure, container runtimes, container registry, + download tools, etc. + +![features](./resource/introduction/features.jpeg) + +## Milestones {#milestones} + +[Dragonfly 1.x](https://github.com/dragonflyoss/Dragonfly) has been open source in November 2017 and used in production +environments by many companies. +And joined the CNCF as a sandbox project in October 2018. +In April 2020, The CNCF Technical Oversight Committee (TOC) voted to accept Dragonfly as an Incubating Project. +In April 2021, Dragonfly 2.0 was released after architectural optimization and code refactoring. + +![milestone](./resource/introduction/milestone.jpeg) + +## Architecture + +Dragonfly services could be divided into four categories: Manager, Scheduler, Seed Peer and Peer. Please refer to [Architecture](./operations/deployment/architecture.md). + +- **Manager**: Maintain the relationship between each P2P cluster, + It primarily offers functions such as dynamic configuration management and data collection. + It also includes a front-end console, enabling users to visually operate and manage the cluster. +- **Scheduler**: Select the best download parent node for the download node. At the appropriate time, + trigger Seed Peer to perform back-to-source downloading, or Peer to perform back-to-source downloading. +- **Seed Peer**: Provides upload and download functions and can serve as a root node in the P2P network, + allowing the Scheduler to actively initiate back-to-source. +- **Peer**: Provides upload and download functions. + +![arch](./resource/concepts/arch.png) + +## How it works + +When downloading an image or file, the download request is proxied to Dragonfly via the Peer HTTP Proxy. +Peer will first register the Task with the Scheduler, and the Scheduler will check the Task metadata +to determine whether the Task is downloaded for the first time in the P2P cluster. +If this is the first time downloading, the Seed Peer will be triggered to download back-to-source, +and the Task will be divided based on the piece level. +After successful registration, The peer establishes a connection to the scheduler based on this task, +and then schedule the Seed Peer to the Peer for streaming based on piece level. +when a piece is successfully downloaded, the piece metadata will be reported to the Scheduler for next scheduling. +If this is not the first time downloading, the Scheduler will schedule other Peers for the download. +The Peer will download pieces from different Peers, splices and returns the entire file, then the P2P download is completed. + +![sequence-diagram](./resource/getting-started/sequence-diagram.png) diff --git a/versioned_docs/version-v2.2.0/operations/best-practices/deployment-best-practices.md b/versioned_docs/version-v2.2.0/operations/best-practices/deployment-best-practices.md new file mode 100644 index 00000000..a6717ac7 --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/best-practices/deployment-best-practices.md @@ -0,0 +1,211 @@ +--- +id: deployment-best-practices +title: Deployment Best Practices +slug: /operations/best-practices/deployment-best-practices/ +--- + +Documentation for setting capacity planning and performance tuning for Dragonfly. + +## Capacity Planning + +A big factor in planning capacity is: highest expected storage capacity. +And know the memory size, CPU core count, and disk capacity of each machine. + +For predicting your capacity, you can use the estimates from below if you don’t have your capacity plan. + +### Manager + +The resources required to deploy the Manager depends on the total number of peers. + +> Run a minimum of 3 replicas. + + + +| Total Number of Peers | CPU | Memory | Disk | +| --------------------- | --- | ------ | ----- | +| 1K | 8C | 16G | 200Gi | +| 5K | 16C | 32G | 200Gi | +| 10K | 16C | 64G | 200Gi | + + + +### Scheduler + +The resources required to deploy the Scheduler depends on the request per second. + +> Run a minimum of 3 replicas. + + + +| Request Per Second | CPU | Memory | Disk | +| ------------------ | --- | ------ | ----- | +| 1K | 8C | 16G | 200Gi | +| 3K | 16C | 32G | 200Gi | +| 5K | 32C | 64G | 200Gi | + + + +### Client + + + +The resources required to deploy the Client depends on the request per second. + +> If it is a Seed Peer, run a minimum of 3 replicas. Disk are calculated based on file storage capacity. + +| Request Per Second | CPU | Memory | Disk | +| ------------------ | --- | ------ | ----- | +| 500 | 8C | 16G | 500Gi | +| 1K | 8C | 16G | 3Ti | +| 3K | 16C | 32G | 5Ti | +| 5K | 32C | 64G | 10Ti | + + + +### Cluster + +The resources required to deploy each service in a P2P cluster depends on the total number of Peers. + + + +| Total Number of Peers | Manager | Scheduler | Seed Peer | Peer | +| --------------------- | ------------------ | ------------------ | ----------------- | ----------- | +| 500 | 4C/8G/200Gi \* 3 | 8C/16G/200Gi \* 3 | 8C/16G/1Ti \* 3 | 4C/8G/500Gi | +| 1K | 8C/16G/200Gi \* 3 | 8C/16G/200Gi \* 3 | 8C/16G/3Ti \* 3 | 4C/8G/500Gi | +| 3K | 16C/32G/200Gi \* 3 | 16C/32G/200Gi \* 3 | 16C/32G/5Ti \* 3 | 4C/8G/500Gi | +| 5K | 16C/64G/200Gi \* 3 | 32C/64G/200Gi \* 3 | 32C/64G/10Ti \* 3 | 4C/8G/500Gi | + + + +## Performance tuning + +The following documentation may help you to achieve better performance especially for large scale runs. + +### Rate limits + +#### Outbound Bandwidth + +Used for node P2P to share piece bandwidth. +If the peak bandwidth is greater than the default outbound bandwidth, +you can set `rateLimit` higher to increase the upload speed. +It is recommended that the configuration be the same as the inbound bandwidth of the machine. +Please refer to [dfdaemon config](../../reference/configuration/client/dfdaemon.md). + +```yaml +upload: + # -- rateLimit is the default rate limit of the upload speed in KiB/MiB/GiB per second, default is 10GiB/s. + rateLimit: 10GiB +``` + +#### Inbound Bandwidth + +Used for node back-to-source bandwidth and download bandwidth from remote peer. +If the peak bandwidth is greater than the default inbound bandwidth, +`rateLimit` can be set higher to increase download speed. +It is recommended that the configuration be the same as the outbound bandwidth of the machine. +Please refer to [dfdaemon config](../../reference/configuration/client/dfdaemon.md). + +```yaml +download: + # -- rateLimit is the default rate limit of the download speed in KiB/MiB/GiB per second, default is 10GiB/s. + rateLimit: 10GiB +``` + +### Concurrency control + +When used to download a single task of a node +the number of concurrent downloads of piece back-to-source and the number of concurrent downloads of piece from remote peer. +The larger the number of piece concurrency, the faster the task download, and the more CPU and memory will be consumed. +The user adjusts the number of piece concurrency according to the actual situation. +and adjust the client’s CPU and memory configuration. +Please refer to [dfdaemon config](../../reference/configuration/client/dfdaemon.md). + +```yaml +download: + # -- concurrentPieceCount is the number of concurrent pieces to download. + concurrentPieceCount: 10 +``` + +### GC + +Used for task cache GC in node disk, taskTTL is calculated based on cache time. +To avoid cases where GC would be problematic or potentially catastrophi, +it is recommended to use the default value. +Please refer to [dfdaemon config](../../reference/configuration/client/dfdaemon.md). + +```yaml +gc: + # interval is the interval to do gc. + interval: 900s + policy: + # taskTTL is the ttl of the task. + taskTTL: 21600s + # distHighThresholdPercent is the high threshold percent of the disk usage. + # If the disk usage is greater than the threshold, dfdaemon will do gc. + distHighThresholdPercent: 80 + # distLowThresholdPercent is the low threshold percent of the disk usage. + # If the disk usage is less than the threshold, dfdaemon will stop gc. + distLowThresholdPercent: 60 +``` + +### Nydus + +When Nydus downloads a file, it splits the file into 1MB chunks and loads them on demand. +Use Seed Peer HTTP proxy as Nydus cache service, +use P2P transmission method to reduce back-to-source requests and back-to-source traffic, +and improve download speed. +When Dragonfly is used as a cache service for Nydus, the configuration needs to be optimized. + +**1.** `proxy.rules.regex` matches the Nydus repository URL, +intercepts download traffic and forwards it to the P2P network. +Please refer to [dfdaemon config](../../reference/configuration/client/dfdaemon.md). + +```yaml +proxy: + # rules is the list of rules for the proxy server. + # regex is the regex of the request url. + # useTLS indicates whether use tls for the proxy backend. + # redirect is the redirect url. + # filteredQueryParams is the filtered query params to generate the task id. + # When filter is ["Signature", "Expires", "ns"], for example: + # http://example.com/xyz?Expires=e1&Signature=s1&ns=docker.io and http://example.com/xyz?Expires=e2&Signature=s2&ns=docker.io + # will generate the same task id. + # Default value includes the filtered query params of s3, gcs, oss, obs, cos. + rules: + - regex: 'blobs/sha256.*' + # useTLS: false + # redirect: "" + # filteredQueryParams: [] +``` + +**2.** Change `Seed Peer Load Limit` to 10000 or higher to improve the P2P cache hit rate between Seed Peers. + +Click the `UPDATE CLUSTER` button to change the `Seed Peer Load Limit` to 10000. +Please refer to [update-cluster](../../advanced-guides/web-console/cluster.md#update-cluster). + +![update-cluster](../../resource/operations/best-practices/deployment-best-practices/update-cluster.png) + +Changed `Seed Peer Load Limit` successfully. + +![cluster](../../resource/operations/best-practices/deployment-best-practices/cluster.png) + +**3.** Nydus will initiate an HTTP range request of about 1MB to achieve on-demand loading. +When prefetch enabled, the Seed Peer can prefetch the complete resource after receiving the HTTP range request, +improving the cache hit rate. +Please refer to [dfdaemon config](../../reference/configuration/client/dfdaemon.md). + +```yaml +proxy: + # prefetch pre-downloads full of the task when download with range request. + prefetch: true +``` + +**4.** When the download speed is slow, +you can adjust the `readBufferSize` value of proxy to 64KB in order to reduce the proxy request time. +Please refer to [dfdaemon config](../../reference/configuration/client/dfdaemon.md). + +```yaml +proxy: + # -- readBufferSize is the buffer size for reading piece from disk, default is 32KB. + readBufferSize: 32768 +``` diff --git a/versioned_docs/version-v2.2.0/operations/best-practices/observability/metrics.md b/versioned_docs/version-v2.2.0/operations/best-practices/observability/metrics.md new file mode 100644 index 00000000..1997b70b --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/best-practices/observability/metrics.md @@ -0,0 +1,87 @@ +--- +id: metrics +title: Prometheus Metrics +slug: /operations/best-practices/observability/prometheus-metrics/ +--- + +This doc contains all the metrics that Dragonfly components currently support. +Now we support metrics for Client, Seed Client, Manager and Scheduler. +The metrics path is fixed to `/metrics`. The following metrics are exported. + +## Client{#client} + +GRPC metrics are exposed via [go-grpc-prometheus](https://github.com/grpc-ecosystem/go-grpc-prometheus). + + + +| Name | Labels | Type | Description | +| :---------------------------------- | :-------------------------------------------- | :-------- | :------------------------------------------------------ | +| download_task_total | type, tag, app, priority | counter | Counter of the number of the download task. | +| download_task_failure_total | type, tag, app, priority | counter | Counter of the number of failed of the download task. | +| prefetch_task_total | type, tag, app, priority | counter | Counter of the number of the prefetch task. | +| prefetch_task_failure_total | type, tag, app, priority | counter | Counter of the number of failed of the prefetch task. | +| concurrent_download_task_total | type, tag, app, priority | gauge | Gauge of the number of concurrent of the download task. | +| concurrent_upload_piece_total | | gauge | Gauge of the number of concurrent of the upload piece. | +| download_traffic | type | counter | Counter of the number of the download traffic. | +| upload_traffic | | counter | Counter of the number of the upload traffic. | +| download_task_duration_milliseconds | task_size_level | histogram | Histogram of the download task duration. | +| version | git_version, git_commit, platform, build_time | gauge | Version info of the service. | + + + +## Manager {#manager} + +GRPC metrics are exposed via [go-grpc-prometheus](https://github.com/grpc-ecosystem/go-grpc-prometheus). + + + +| Name | Labels | Type | Description | +| :------------------------------------------------------- | :------------------------------------------------------------------------------------------- | :------ | :-------------------------------------------------------------- | +| dragonfly_manager_peer_total | version, commit | gauge | Gauge of the number of peer. | +| dragonfly_manager_search_scheduler_cluster_total | version, commit | counter | Counter of the number of searching scheduler cluster. | +| dragonfly_manager_search_scheduler_cluster_failure_total | version, commit | counter | Counter of the number of failed of searching scheduler cluster. | +| dragonfly_manager_version | major, minor, git_version, git_commit, platform, build_time, go_version, go_tags, go_gcflags | gauge | Version info of the service. | + + + +## Scheduler {#scheduler} + +GRPC metrics are exposed via [go-grpc-prometheus](https://github.com/grpc-ecosystem/go-grpc-prometheus). + + + +| Name | Labels | Type | Description | +| :----------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------- | :------ | :--------------------------------------------------------------------------- | +| dragonfly_scheduler_announce_peer_total | | counter | Counter of the number of the announcing peer. | +| dragonfly_scheduler_announce_peer_failure_total | | counter | Counter of the number of failed of the announcing peer. | +| dragonfly_scheduler_stat_peer_total | | counter | Counter of the number of the stat peer. | +| dragonfly_scheduler_stat_peer_failure_total | | counter | Counter of the number of failed of the stat peer. | +| dragonfly_scheduler_leave_peer_total | | counter | Counter of the number of the leaving peer. | +| dragonfly_scheduler_leave_peer_failure_total | | counter | Counter of the number of failed of the leaving peer. | +| dragonfly_scheduler_exchange_peer_total | | counter | Counter of the number of the exchanging peer. | +| dragonfly_scheduler_exchange_peer_failure_total | | counter | Counter of the number of the exchanging peer. | +| dragonfly_scheduler_register_peer_total | priority, task_type, task_tag, task_app, host_type | counter | Counter of the number of the register peer. | +| dragonfly_scheduler_register_peer_failure_total | priority, task_type, task_tag, task_app, host_type | counter | Counter of the number of failed of the register peer. | +| dragonfly_scheduler_download_peer_started_total | priority, task_type, task_tag, task_app, host_type | counter | Counter of the number of the download peer started. | +| dragonfly_scheduler_download_peer_started_failure_total | priority, task_type, task_tag, task_app, host_type | counter | Counter of the number of failed of the download peer started. | +| dragonfly_scheduler_download_peer_back_to_source_started_total | priority, task_type, task_tag, task_app, host_type | counter | Counter of the number of the download peer back-to-source started. | +| dragonfly_scheduler_download_peer_back_to_source_started_failure_total | priority, task_type, task_tag, task_app, host_type | counter | Counter of the number of failed of the download peer back-to-source started. | +| dragonfly_scheduler_download_peer_finished_total | priority, task_type, task_tag, task_app, host_type | counter | Counter of the number of the download peer. | +| dragonfly_scheduler_download_peer_finished_failure_total | priority, task_type, task_tag, task_app, host_type | counter | Counter of the number of failed of the download peer. | +| dragonfly_scheduler_download_peer_back_to_source_finished_failure_total | priority, task_type, task_tag, task_app, host_type | counter | Counter of the number of failed of the download peer back-to-source. | +| dragonfly_scheduler_download_piece_finished_total | traffic_type, task_type, task_tag, task_app, host_type | counter | Counter of the number of the download piece. | +| dragonfly_scheduler_download_piece_finished_failure_total | traffic_type, task_type, task_tag, task_app, host_type | counter | Counter of the number of failed of the download piece. | +| dragonfly_scheduler_download_piece_back_to_source_finished_failure_total | traffic_type, task_type, task_tag, task_app, host_type | counter | Counter of the number of failed of the download piece back-to-source. | +| dragonfly_scheduler_stat_task_total | | counter | Counter of the number of the stat task. | +| dragonfly_scheduler_stat_task_failure_total | | counter | Counter of the number of failed of the stat task. | +| dragonfly_scheduler_announce_host_total | os, platform, platform_family, platform_version, kernel_version, git_version, git_commit, go_version, build_platform | counter | Counter of the number of the announce host. | +| dragonfly_scheduler_announce_host_failure_total | os, platform, platform_family, platform_version, kernel_version, git_version, git_commit, go_version, build_platform | counter | Counter of the number of failed of the announce host. | +| dragonfly_scheduler_leave_host_total | | counter | Counter of the number of the leaving host. | +| dragonfly_scheduler_leave_host_failure_total | | counter | Counter of the number of failed of the leaving host. | +| dragonfly_scheduler_traffic | type, task_type, task_tag, task_app, host_type | counter | Counter of the number of traffic. | +| dragonfly_scheduler_host_traffic | type, task_type, task_tag, task_app, host_type, host_id, host_ip, host_name | counter | Counter of the number of per host traffic. | +| dragonfly_scheduler_download_peer_duration_milliseconds | task_size_level | summary | Summary of the time each peer downloading. | +| dragonfly_scheduler_concurrent_schedule_total | | gauge | Gauge of the number of concurrent of the scheduling. | +| dragonfly_scheduler_version | major, minor, git_version, git_commit, platform, build_time, go_version, go_tags, go_gcflags | counter | Version info of the service. | + + diff --git a/versioned_docs/version-v2.2.0/operations/best-practices/observability/monitoring.md b/versioned_docs/version-v2.2.0/operations/best-practices/observability/monitoring.md new file mode 100644 index 00000000..b4ab509d --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/best-practices/observability/monitoring.md @@ -0,0 +1,303 @@ +--- +id: monitoring +title: Monitoring +slug: /operations/best-practices/observability/monitoring/ +--- + +Dragonfly is recommending to use [prometheus](https://prometheus.io/) for monitoring. +Prometheus and grafana configurations are maintained in the +[dragonflyoss/monitoring](https://github.com/dragonflyoss/monitoring/) repository. + +Grafana dashboards are published in [grafana.com](https://grafana.com/), +and the address of the dashboards are [Manager](https://grafana.com/grafana/dashboards/15945/), +[Scheduler](https://grafana.com/grafana/dashboards/15944/) and +[Peer](https://grafana.com/grafana/dashboards/15946/). + +The following Dragonfly monitoring example is based on [kubernetes](https://kubernetes.io/), and uses the +[prometheus-community/kube-prometheus-stack](https://artifacthub.io/packages/helm/prometheus-community/kube-prometheus-stack/) +charts to deploy prometheus and grafana. + +## Prerequisites {#prerequisites} + +| Name | Version | Document | +| ------------------ | -------- | --------------------------------------- | +| Kubernetes cluster | 1.20+ | [kubernetes.io](https://kubernetes.io/) | +| Helm | v3.8.0+ | [helm.sh](https://helm.sh/) | +| Prometheus | v2.40.0+ | [prometheus.io](https://prometheus.io/) | + +## Setup kubernetes cluster {#setup-kubernetes-cluster} + +[Kind](https://kind.sigs.k8s.io/) is recommended if no Kubernetes cluster is available for testing. + +Create kind multi-node cluster configuration file `kind-config.yaml`, configuration content is as follows: + +```yaml +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + - role: worker +``` + +Create a kind multi-node cluster using the configuration file: + +```shell +kind create cluster --config kind-config.yaml +``` + +Switch the context of kubectl to kind cluster: + +```shell +kubectl config use-context kind-kind +``` + +## Kind loads Dragonfly image {#kind-loads-dragonfly-image} + +Pull Dragonfly latest images: + +```shell +docker pull dragonflyoss/scheduler:latest +docker pull dragonflyoss/manager:latest +docker pull dragonflyoss/client:latest +docker pull dragonflyoss/dfinit:latest +``` + +Kind cluster loads Dragonfly latest images: + +```shell +kind load docker-image dragonflyoss/scheduler:latest +kind load docker-image dragonflyoss/manager:latest +kind load docker-image dragonflyoss/client:latest +kind load docker-image dragonflyoss/dfinit:latest +``` + +## Create Prometheus and Grafana based on Helm Charts{#create-prometheus-and-grafana-based-on-helm-charts} + +Install prometheus and grafana based on [kube-prometheus-stack](https://artifacthub.io/packages/helm/prometheus-community/kube-prometheus-stack) + + + +```bash +$ helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +$ helm install --create-namespace --namespace prometheus prometheus prometheus-community/kube-prometheus-stack -f https://raw.githubusercontent.com/dragonflyoss/monitoring/main/prometheus/values.yaml +NAME: prometheus +LAST DEPLOYED: Tue Jun 11 15:37:56 2024 +NAMESPACE: default +STATUS: deployed +REVISION: 1 +NOTES: +kube-prometheus-stack has been installed. Check its status by running: + kubectl --namespace default get pods -l "release=prometheus" + +Visit https://github.com/prometheus-operator/kube-prometheus for instructions on how to create & configure Alertmanager and Prometheus instances using the Operator. +``` + + + +Check that Prometheus is deployed successfully: + +```shell +$ kubectl get po -n prometheus +NAME READY STATUS RESTARTS AGE +alertmanager-prometheus-kube-prometheus-alertmanager-0 2/2 Running 2 (47m ago) 71m +prometheus-grafana-7576556869-jzpsf 3/3 Running 3 (47m ago) 73m +prometheus-kube-prometheus-operator-fd56bbb4f-29sp6 1/1 Running 2 (47m ago) 73m +prometheus-kube-state-metrics-7d7654ff7-7vtrg 1/1 Running 2 (47m ago) 73m +prometheus-prometheus-kube-prometheus-prometheus-0 2/2 Running 2 (47m ago) 71m +prometheus-prometheus-node-exporter-8dl68 1/1 Running 1 (47m ago) 73m +prometheus-prometheus-node-exporter-jlgcp 1/1 Running 1 (47m ago) 73m +prometheus-prometheus-node-exporter-tlhld 1/1 Running 1 (47m ago) 73m +``` + +Expose grafana port 8080 and access the address `localhost:8080` to see the grafana dashboard, +You can login with username `admin` and password `prom-operator` + +```bash +kubectl --namespace prometheus port-forward svc/prometheus-grafana 8080:80 +``` + +- ![grafana-login](../../../resource/operations/best-practices/observability/monitoring/grafana-login.jpg) + +## Create Dragonfly cluster based on helm charts {#create-dragonfly-cluster-based-on-helm-charts} + +Create the Helm Charts configuration file `values.yaml`, Turn on the `ServiceMonitor` function, Please refer to the [serviceMonitor](https://github.com/dragonflyoss/helm-charts/blob/main/charts/dragonfly/values.yaml#L247). + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + serviceMonitor: + enable: true + prometheusRule: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + enableHost: true + serviceMonitor: + enable: true + prometheusRule: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + serviceMonitor: + enable: true + prometheusRule: + enable: true + config: + verbose: true + log: + level: info + proxy: + prefetch: true + registryMirror: + addr: https://index.docker.io + rules: + - regex: blobs/sha256.* + +client: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + serviceMonitor: + enable: true + prometheusRule: + enable: true + config: + verbose: true + dfinit: + enable: true + image: + repository: dragonflyoss/dfinit + tag: latest + config: + containerRuntime: + containerd: + configPath: /etc/containerd/config.toml + registries: + - hostNamespace: docker.io + serverAddr: https://index.docker.io + capabilities: ['pull', 'resolve'] + - hostNamespace: ghcr.io + serverAddr: https://ghcr.io + capabilities: ['pull', 'resolve'] + config: + verbose: true + log: + level: info + proxy: + prefetch: true + registryMirror: + addr: https://index.docker.io + rules: + - regex: blobs/sha256.* +``` + +Create a Dragonfly cluster using the configuration file: + + + +```shell +$ helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ +$ helm install --create-namespace --namespace dragonfly-system dragonfly dragonfly/dragonfly -f values.yaml +NAME: dragonfly +LAST DEPLOYED: Tue Jun 11 16:12:19 2024 +NAMESPACE: dragonfly-system +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace dragonfly-system port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +1. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +1. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ +``` + + + +Check that Dragonfly is deployed successfully: + +```shell +$ kubectl get po -n dragonfly-system +NAME READY STATUS RESTARTS AGE +dragonfly-client-b6vjg 1/1 Running 0 63m +dragonfly-client-mpc7w 1/1 Running 0 63m +dragonfly-manager-6598986b85-22n7k 1/1 Running 0 63m +dragonfly-manager-6598986b85-4lfvv 1/1 Running 0 63m +dragonfly-manager-6598986b85-cngzb 1/1 Running 0 63m +dragonfly-mysql-0 1/1 Running 0 63m +dragonfly-redis-master-0 1/1 Running 0 63m +dragonfly-redis-replicas-0 1/1 Running 0 63m +dragonfly-redis-replicas-1 1/1 Running 0 61m +dragonfly-redis-replicas-2 1/1 Running 0 61m +dragonfly-scheduler-0 1/1 Running 0 63m +dragonfly-scheduler-1 1/1 Running 0 58m +dragonfly-scheduler-2 1/1 Running 0 45m +dragonfly-seed-client-0 1/1 Running 0 63m +dragonfly-seed-client-1 1/1 Running 0 50m +dragonfly-seed-client-2 1/1 Running 0 47m +``` + +## Validate metrics {#step-3-validate-metrics} + +Visit grafana explore page at `localhost:8080/explore` and +query `dragonfly_manager_requests_total` to validate that Dragonfly metrics have been collected. + +![grafana-validate-metrics](../../../resource/operations/best-practices/observability/monitoring/grafana-validate-metrics.jpg) + +## Import Dragonfly grafana dashboards {#step-4-import-dragonfly-grafana-dashboards} + +Dragonfly grafana dashboard info is: + + + +| Name | ID | Link | Description | +| :-------------------- | :---- | :------------------------------------------- | :---------------------------------------------------------------- | +| Dragonfly Manager | 15945 | https://grafana.com/grafana/dashboards/15945 | Grafana dashboard for dragonfly manager. | +| Dragonfly Scheduler | 15944 | https://grafana.com/grafana/dashboards/15944 | Granafa dashboard for dragonfly scheduler. | +| Dragonfly Client | 21053 | https://grafana.com/grafana/dashboards/21053 | Grafana dashboard for dragonfly client and dragonfly seed client. | +| Dragonfly Seed Client | 21054 | https://grafana.com/grafana/dashboards/21054 | Grafana dashboard for dragonfly seed client. | + + + +Import Dragonfly grafana dashboard using ID, IDs are `15945`, `15944`, `21053` and `21054`, refer to [export-import](https://grafana.com/docs/grafana/latest/dashboards/export-import/) + +![grafana-import-dashboard](../../../resource/operations/best-practices/observability/monitoring/grafana-import-dashboard.jpg) + +Import Dragonfly grafana dashboard successfully, you can visit the dashboard + +![grafana-manager](../../../resource/operations/best-practices/observability/monitoring/grafana-manager.jpg) + +![grafana-scheduler](../../../resource/operations/best-practices/observability/monitoring/grafana-scheduler.jpg) + +![grafana-peer](../../../resource/operations/best-practices/observability/monitoring/grafana-peer.jpg) diff --git a/versioned_docs/version-v2.2.0/operations/best-practices/security/threat-model.md b/versioned_docs/version-v2.2.0/operations/best-practices/security/threat-model.md new file mode 100644 index 00000000..c295b784 --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/best-practices/security/threat-model.md @@ -0,0 +1,197 @@ +--- +id: threat-model +title: Threat Model +slug: /operations/best-practices/security/threat-model/ +--- + +This document outlines the threat model for the Dragonfly system. The threat model outlines potential threats faced by +Dragonfly designers and operators, aiming to mitigate risks associated with bypassing, reducing efficacy, +or misusing the system. Dragonfly community analysts system security based on the [STRIDE model](https://en.wikipedia.org/wiki/STRIDEmodel). + +## Threat Model + +According to analysis of Dragonfly architecture, the threat attackers are classified into the following categories: + +- **External Malicious Attackers**: These are attackers who are not part of the Dragonfly system but can interact with + the system to exploit vulnerabilities. +- **Internal Malicious Attackers**: These are attackers who are part of the Dragonfly system and have access to + the system's resources to exploit vulnerabilities. + +### Attack surface of external malicious attackers + +Considering the external malicious attackers and the architecture of the Dragonfly system, the following attack surfaces +are identified: + +![external-attackers](../../../resource/operations/best-practices/security/external-attackers.png) + +#### Threat ID 1:Attacker gets access to valid credentials for the manager + +Scenario + +An attacker gets access to valid credentials for the manager and can control the manager to perform malicious operations. + +Impact + +The attacker can preheat images or files with the access of the manager and manager will call the scheduler to preheat +the images or files in the seed peers. If a large number of files are downloaded, the seed peer will frequently request origin, +resulting in reduced origin performance. The disk space of the seed peer will be occupied by the downloaded files. + +Mitigation + +- Personal access tokens are used to authenticate the manager and RBAC is used to control the manager's permissions. +- Set expiration time for personal access tokens and rotate them regularly. +- Decrease the rate limit of the preheat operation in manager to prevent the seed peer requesting origin frequently. +- Set the rate limit of downloading files in the seed peer to prevent the origin pressure. +- GC the disk space of the seed peer regularly. + +Recommendations + +- Issue personal access tokens in manager console and set the expiration time for the tokens. +- Set the rate limit of the RESTful API for the manager in the configuration. +- Add the certificate to the manager to ensure the security of the communication. +- Set the rate limit of the downloading files in the seed peer in the configuration, and the value + needs to evaluate according to the actual situation. +- Set the GC policy of the disk space in the seed peer in the configuration to prevent the disk space + to be occupied fully. + +#### Threat ID 2:Attacker get access for the GRPC server of the peer + +Scenario + +An attacker gets access to the GRPC server of the peer and can control the peer to perform malicious operations. + +Impact + +The attacker can download files from the peer and the peer will request the scheduler to schedule the download task. +If a large number of files are downloaded, the peer will frequently request the scheduler, resulting +in reduced scheduler performance. The disk space of the peer will be occupied by the downloaded files. + +Mitigation + +- The system user is used to authenticate the GRPC server with unix domain socket. +- If the scheduler receives a large number of requests from the peer, the scheduler will reject + the request by rate limiting. +- GC the disk space of the peer regularly. + +Recommendations + +- The system user is important to ensure the security of the communication between the user and the peer, + so it needs to be set by security policy. +- The GRPC server of the scheduler needs to set the rate limit of the request to prevent the scheduler + from receiving a large number of requests from the peer. +- Set the GC policy of the disk space in the seed peer in the configuration to prevent the disk space + +#### Threat ID 3:Attacker gets access for the database + +Scenario + +An attacker gets access to the database and can control the database to perform malicious operations. + +Impact + +The attacker can delete the data in the database, which will cause the system to be unavailable. The peer and seed peer +can not find the available scheduler to schedule the download task and the scheduler can not find the available seed peer +to trigger download task back-to-origin. + +Mitigation + +- Scheduler and manager will cache the data in the memory to reduce the impact of the database being + attacked in the short time. Users can use the short time to recover the database. + +Recommendations + +- Use the password to protect the database and set the password policy. +- Use the TLS to protect the communication between the database and the manager. +- The database needs to be backed up regularly to prevent the data from being lost. + +#### Threat ID 4:Attacker uploads malicious files to the origin + +Scenario + +An attacker uploads malicious files to the origin and the seed peer will download the malicious files. + +Impact + +The seed peer will download the malicious files and the peer will download the malicious files from the seed peer. +The malicious files will be transferred to the user and the user will be attacked. + +Mitigation + +- The peer and seed peer will check the digest of the file from origin to ensure the file is not modified. +- The peer will check the digest of the file between peer and peer to ensure the file is not modified. + +Recommendations + +- The origin needs to pervent the attacker from uploading the malicious files. +- Use the TLS to protect the communication between the origin and the peer. + +### Attack surface of internal malicious attackers + +Considering the internal malicious attackers and the architecture of the Dragonfly system, the following attack surfaces +are identified: + +![internal-attackers](../../../resource/operations/best-practices/security/internal-attackers.png) + +#### Threat ID 4: Attacker hijacks communication between the manager and the scheduler + +Scenario + +An attacker hijacks the communication between the manager and the scheduler and can control the +scheduler to perform malicious operations. + +Impact + +The attacker can hijack the communication between the manager and the scheduler and the scheduler will not receive the +correct dynamic config, resulting in the risk of spoofing and tampering in the communication. + +Mitigation + +- Mutual authentication is used to authenticate the manager and the scheduler, the encryption is used to prevent + the communication to avoid risk of spoofing and tampering. + +Recommendations + +- Set the self-signed certificate to the manager and the scheduler to ensure the security of the communication. +- The certificate needs to be rotated regularly to prevent the certificate from being stolen. + +#### Threat ID 5: Attacker hijacks communication between the scheduler and the peer + +Scenario + +An attacker hijacks the communication between the scheduler and the peer and can control the scheduler to +perform malicious operations. + +Impact + +The attacker can hijack the communication between the scheduler and the peer and the scheduler will not receive the correct +message, resulting in the risk of spoofing and tampering in the communication. + +Mitigation + +- Mutual authentication is used to authenticate the scheduler and the peer, the encryption is used to prevent the communication + to avoid risk of spoofing and tampering. + +Recommendations + +- Set the self-signed certificate to the scheduler and the peer to ensure the security of the communication. +- The certificate needs to be rotated regularly to prevent the certificate from being stolen. + +#### Threat ID 6: Attacker destroys the download task cache in the seed peer + +Scenario + +An attacker destroys the download task cache in the seed peer and the dirty data will be transferred to the other peers. + +Impact + +- The dirty data will be transferred to the other peers and the user will download the dirty data. +- As long as one peer has dirty data in the P2P cluster, no peers will not be able to download correct data. + +Mitigation + +- Verify the digest of the file based on the piece level to ensure the file is not modified. +- Clean the dirty data based on the task ID in manager console. + +Recommendations + +- Enable the piece level verification to ensure the file is not modified. diff --git a/versioned_docs/version-v2.2.0/operations/deployment/applications/client.md b/versioned_docs/version-v2.2.0/operations/deployment/applications/client.md new file mode 100644 index 00000000..187c0c89 --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/deployment/applications/client.md @@ -0,0 +1,29 @@ +--- +id: client +title: Client +slug: /operations/deployment/applications/client/ +--- + +Client is the peer client in P2P network. Use `dfdaemon` to start, or `dfget` to download and upload. + +## Features {#features} + +- Serve gRPC for `dfget` with downloading feature, + and provide adaptation to different source protocols. +- It can be used as seed peer. Turning on the Seed Peer mode can be used as + a back-to-source download peer in a P2P cluster, + which is the root peer for download in the entire cluster. +- Serve proxy for container registry mirror and any other http backend. +- Download object like via `http`, `https` and other custom protocol. +- Set disk usage, automatic GC capabilities. +- Customize the download task piece size. +- Supports RDMA for faster network transmission in the P2P network. + It can better support the loading of AI inference models into memory. +- Supports file writing and seeding, it can be accessed in the P2P cluster without uploading to other storage. + Helps AI models and AI datasets to be read and written faster in the P2P network. + +## Relationship {#relationship} + +- Client registers itself to Manager for fetching Scheduler. +- Client registers P2P tasks to Scheduler. +- Client uploads data to other Client. diff --git a/versioned_docs/version-v2.2.0/operations/deployment/applications/manager.md b/versioned_docs/version-v2.2.0/operations/deployment/applications/manager.md new file mode 100644 index 00000000..36703eba --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/deployment/applications/manager.md @@ -0,0 +1,36 @@ +--- +id: manager +title: Manager +slug: /operations/deployment/applications/manager/ +--- + +It plays the role of Manager in the multi-P2P cluster deployment process. +Used to manage the dynamic configuration that each module depends on, +and provide keepalive and metrics functions. + +## Features {#features} + +- Stores dynamic configuration for consumption by seed peer cluster, Scheduler cluster and Client. +- Maintain the relationship between seed peer cluster and Scheduler cluster. +- Provide async task management features for image preheat combined with harbor. +- Keepalive with Scheduler instance and seed peer instance. +- Filter the optimal Scheduler cluster for Client. +- Provides a visual console, which is helpful for users to manage the P2P cluster. +- Peer features are configurable. For example, you can make the peer can not be uploaded and can only be downloaded. +- Clear P2P task cache. +- Display P2P traffic distribution. +- Peer information display, including CPU, Memory, etc. + +## Relationship {#relationship} + +- Seed peer cluster and Scheduler cluster have a `1:1` relationship +- Seed peer cluster and Seed peer instance have a `1:N` relationship +- Scheduler cluster and Scheduler instance have a `1:N` relationship + +## Manage multiple P2P networks {#manage-multiple-p2p-networks} + +Manager can manage multiple P2P networks. +Usually, a P2P network includes a Scheduler cluster, a seed peer clsuter and many dfdaemons. +The service network must be available in a P2P network. + +![manage-multiple-p2p-networks](../../../resource/architecture/manage-multiple-p2p-networks.png) diff --git a/versioned_docs/version-v2.2.0/operations/deployment/applications/scheduler.md b/versioned_docs/version-v2.2.0/operations/deployment/applications/scheduler.md new file mode 100644 index 00000000..e53364fd --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/deployment/applications/scheduler.md @@ -0,0 +1,41 @@ +--- +id: scheduler +title: Scheduler +slug: /operations/deployment/applications/scheduler/ +--- + +Scheduler selects the optimal parent peer for current peer to be downloaded +and triggers the seed peer back-to-source download or Client back-to-source download at the appropriate time. + +## Features {#features} + +- Based on the multi-feature intelligent scheduling system selects the optimal parent peer. +- Build a scheduling directed acyclic graph for the P2P cluster. +- Remove abnormal peer based on peer multi-feature evaluation results. +- In the case of scheduling failure, notice peer back-to-source download. +- Provide metadata storage to support file writing and seeding. + +## Scheduling {#scheduling} + +Scheduler maintains task, peer and host resources. + +- Peer: a download task for Client +- Host: host information for Client, host and peer have a `1:N` relationship +- Task: a download task, task and peer have a `1:N` relationship + +The scheduling process is actually to build a directed acyclic graph according to the peer's load. + +![scheduler-dag](../../../resource/architecture/scheduler-dag.png) + +## Peer State Machine {#peer-state-machine} + +The Scheduler divides tasks into three types `Tiny`, `Small` and `Normal`. + +- Tiny: file size is less than 128 bytes +- Small: only one piece task +- Normal: tasks with more than one piece + +Different scheduling strategies are used for different types of download tasks, +following state transition diagram during the peer scheduling process. + +![scheduler-state-machine](../../../resource/architecture/scheduler-state-machine.jpg) diff --git a/versioned_docs/version-v2.2.0/operations/deployment/architecture.md b/versioned_docs/version-v2.2.0/operations/deployment/architecture.md new file mode 100644 index 00000000..c9bf60e9 --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/deployment/architecture.md @@ -0,0 +1,68 @@ +--- +id: architecture +title: Architecture +slug: /operations/deployment/architecture/ +--- + +## Positioning {#positioning} + +Provide efficient, stable, secure, low-cost file and +image distribution services to be the best practice and +standard solution in cloud native architectures. + +## Features {#features} + +- Based on the multi-feature intelligent scheduling system, it not only improves the + download efficiency but also ensures the system stability. +- By adapting to support different source protocols (HDFS, + storage services of various cloud vendors, Maven, YUM, etc.). +- Support more distribution modes, such as active pull, active push, + sync, preheat, etc. +- Separation between systems, support separate deployment to + meet the needs of different scenarios +- Based on the newly designed P2P protocol framework of grpc, + with better efficiency and stability. +- Customized P2P protocol based on GRPC is efficient and stable. +- Support user RBAC and multi-tenant isolation. +- Improve distribution efficiency by dynamically compressing files during distribution. +- The client supports third-party client integration + of Dragonfly's P2P capabilities through the C/S mode. +- Support features such as task management, data visualization, and control of multiple P2P clusters. +- Integration with cloud native ecosystem, such as Harbor, Nydus, etc. +- Support AI infrastructure to efficiently distribute models and datasets, and integrated with the AI ecosystem. + +## Architecture {#architecture} + +![arch](../../resource/concepts/arch.png) + +## Subsystem features {#subsystem-features} + +### Manager {#manager} + +- Stores dynamic configuration for consumption by seed peer cluster, scheduler cluster and client. +- Maintain the relationship between seed peer cluster and scheduler cluster. +- Provide async task management features for image preheat combined with harbor. +- Keepalive with scheduler instance and seed peer instance. +- Filter the optimal scheduler cluster for client. +- Provides a visual console, which is helpful for users to manage the P2P cluster. +- Clearing P2P task cache. + +### Scheduler {#scheduler} + +- Based on the multi-feature intelligent scheduling system selects the optimal parent peer. +- Build a scheduling directed acyclic graph for the P2P cluster. +- Remove abnormal peer based on peer multi-feature evaluation results. +- In the case of scheduling failure, notice peer back-to-source download. +- Provide metadata storage to support file writing and seeding. + +### Client {#client} + +- Serve gRPC for `dfget` with downloading feature, + and provide adaptation to different source protocols. +- It can be used as seed peer. Turning on the Seed Peer mode can be used as + a back-to-source download peer in a P2P cluster, + which is the root peer for download in the entire cluster. +- Serve proxy for container registry mirror and any other http backend. +- Download object like via `http`, `https` and other custom protocol. +- Supports RDMA for faster network transmission in the P2P network. + It can better support the loading of AI inference models into memory. diff --git a/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/containerd.md b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/containerd.md new file mode 100644 index 00000000..9307da3a --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/containerd.md @@ -0,0 +1,705 @@ +--- +id: containerd +title: containerd +slug: /operations/integrations/container-runtime/containerd/ +--- + +Documentation for setting Dragonfly's container runtime to containerd. + +## Prerequisites {#prerequisites} + +| Name | Version | Document | +| ------------------ | ------- | --------------------------------------- | +| Kubernetes cluster | 1.20+ | [kubernetes.io](https://kubernetes.io/) | +| Helm | v3.8.0+ | [helm.sh](https://helm.sh/) | +| containerd | v1.5.0+ | [containerd.io](https://containerd.io/) | + +## Quick Start {#quick-start} + +### Setup kubernetes cluster {#setup-kubernetes-cluster} + +[Kind](https://kind.sigs.k8s.io/) is recommended if no Kubernetes cluster is available for testing. + +Create kind multi-node cluster configuration file `kind-config.yaml`, configuration content is as follows: + +```yaml +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + - role: worker +``` + +Create a kind multi-node cluster using the configuration file: + +```shell +kind create cluster --config kind-config.yaml +``` + +Switch the context of kubectl to kind cluster: + +```shell +kubectl config use-context kind-kind +``` + +### Kind loads Dragonfly image {#kind-loads-dragonfly-image} + +Pull Dragonfly latest images: + +```shell +docker pull dragonflyoss/scheduler:latest +docker pull dragonflyoss/manager:latest +docker pull dragonflyoss/client:latest +docker pull dragonflyoss/dfinit:latest +``` + +Kind cluster loads Dragonfly latest images: + +```shell +kind load docker-image dragonflyoss/scheduler:latest +kind load docker-image dragonflyoss/manager:latest +kind load docker-image dragonflyoss/client:latest +kind load docker-image dragonflyoss/dfinit:latest +``` + +### Create Dragonfly cluster based on helm charts {#create-dragonfly-cluster-based-on-helm-charts} + +Create the Helm Charts configuration file `values.yaml`. Please refer to the +[configuration](https://artifacthub.io/packages/helm/dragonfly/dragonfly#values) documentation for details. + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + +client: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + dfinit: + enable: true + image: + repository: dragonflyoss/dfinit + tag: latest + config: + containerRuntime: + containerd: + configPath: /etc/containerd/config.toml + registries: + - hostNamespace: docker.io + serverAddr: https://index.docker.io + capabilities: ['pull', 'resolve'] +``` + +Create a Dragonfly cluster using the configuration file: + + + +```shell +$ helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ +$ helm install --wait --create-namespace --namespace dragonfly-system dragonfly dragonfly/dragonfly -f values.yaml +NAME: dragonfly +LAST DEPLOYED: Mon Apr 28 10:59:19 2024 +NAMESPACE: dragonfly-system +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace dragonfly-system port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +2. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +3. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ +``` + + + +Check that Dragonfly is deployed successfully: + +```shell +$ kubectl get po -n dragonfly-system +NAME READY STATUS RESTARTS AGE +dragonfly-client-54vm5 1/1 Running 0 37m +dragonfly-client-cvbln 1/1 Running 0 37m +dragonfly-manager-864774f54d-njdhx 1/1 Running 0 37m +dragonfly-mysql-0 1/1 Running 0 37m +dragonfly-redis-master-0 1/1 Running 0 37m +dragonfly-redis-replicas-0 1/1 Running 0 37m +dragonfly-redis-replicas-1 1/1 Running 0 5m10s +dragonfly-redis-replicas-2 1/1 Running 0 4m44s +dragonfly-scheduler-0 1/1 Running 0 37m +dragonfly-seed-client-0 1/1 Running 2 (27m ago) 37m +``` + +### Containerd downloads images through Dragonfly {#containerd-downloads-images-through-dragonfly} + +Pull `alpine:3.19` image in kind-worker node: + +```shell +docker exec -i kind-worker /usr/local/bin/crictl pull alpine:3.19 +``` + +#### Verify {#verify} + +You can execute the following command to check if the `alpine:3.19` image is distributed via Dragonfly. + + + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="kind-worker")].metadata.name}' | head -n 1 ) + +# Find peer id. +export TASK_ID=$(kubectl -n dragonfly-system exec ${POD_NAME} -- sh -c "grep -hoP 'library/alpine.*task_id=\"\K[^\"]+' /var/log/dragonfly/dfdaemon/* | head -n 1") + +# Check logs. +kubectl -n dragonfly-system exec -it ${POD_NAME} -- sh -c "grep ${TASK_ID} /var/log/dragonfly/dfdaemon/* | grep 'download task succeeded'" +``` + + + +The expected output is as follows: + +```shell +{ + 2024-04-19T02:44:09.259458Z INFO + "download_task":"dragonfly-client/src/grpc/dfdaemon_download.rs:276":: "download task succeeded" + "host_id": "172.18.0.3-kind-worker", + "task_id": "a46de92fcb9430049cf9e61e267e1c3c9db1f1aa4a8680a048949b06adb625a5", + "peer_id": "172.18.0.3-kind-worker-86e48d67-1653-4571-bf01-7e0c9a0a119d" +} +``` + +## More configurations {#more-configurations} + +### Multiple Registries {#multiple-registries} + +**Method 1**: Deploy using Helm Charts and create the Helm Charts configuration file `values.yaml`. +Please refer to the [configuration](https://artifacthub.io/packages/helm/dragonfly/dragonfly#values) documentation for details. + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + +client: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + dfinit: + enable: true + image: + repository: dragonflyoss/dfinit + tag: latest + config: + containerRuntime: + containerd: + configPath: /etc/containerd/config.toml + registries: + - hostNamespace: docker.io + serverAddr: https://index.docker.io + capabilities: ['pull', 'resolve'] + - hostNamespace: ghcr.io + serverAddr: https://ghcr.io + capabilities: ['pull', 'resolve'] +``` + +**Method 2**: Modify your `config.toml` (default location: `/etc/containerd/config.toml`), refer to [registry-configuration-examples](https://github.com/containerd/containerd/blob/main/docs/hosts.md#registry-configuration---examples). + +> Notice: config_path is the path where containerd looks for registry configuration files. + +```toml +# explicitly use v2 config format +version = 2 + +[plugins."io.containerd.grpc.v1.cri".registry] + config_path = "/etc/containerd/certs.d" +``` + +Create the registry configuration file `/etc/containerd/certs.d/docker.io/hosts.toml`: + +> Notice: The container registry is `https://index.docker.io`. + +```toml +server = "https://index.docker.io" + +[host."http://127.0.0.1:4001"] +capabilities = ["pull", "resolve"] + +[host."http://127.0.0.1:4001".header] +X-Dragonfly-Registry = "https://index.docker.io" +``` + +Create the registry configuration file `/etc/containerd/certs.d/ghcr.io/hosts.toml`: + +> Notice: The container registry is `https://ghcr.io`. + +```toml +server = "https://ghcr.io" + +[host."http://127.0.0.1:4001"] +capabilities = ["pull", "resolve"] + +[host."http://127.0.0.1:4001".header] +X-Dragonfly-Registry = "https://ghcr.io" +``` + +Restart containerd: + +```shell +systemctl restart containerd +``` + +### Private project {#private-project} + +Deploy using Helm Charts and create the Helm Charts configuration file `values.yaml`. +Please refer to the [configuration](https://artifacthub.io/packages/helm/dragonfly/dragonfly#values) documentation for details. + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + +client: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + dfinit: + enable: true + image: + repository: dragonflyoss/dfinit + tag: latest + config: + containerRuntime: + containerd: + configPath: /etc/containerd/config.toml + registries: + - hostNamespace: your_private_registry_host_addr + serverAddr: your_private_registry_server_addr + capabilities: ['pull', 'resolve'] +``` + +Modify your `config.toml` (default location: `/etc/containerd/config.toml`), refer to [configure-registry-credentials](https://github.com/containerd/containerd/blob/v1.5.2/docs/cri/registry.md#configure-registry-credentials). + +> Notice:`your_private_registry_host_addr` is your private registry host address. + +```toml +[plugins."io.containerd.grpc.v1.cri".registry.configs."your_private_registry_host_addr".auth] + username = "your_private_registry_username" + password = "your_private_registry_password" + auth = "your_private_registry_token" +[plugins."io.containerd.grpc.v1.cri".registry.configs."127.0.0.1:4001".auth] + username = "your_private_registry_username" + password = "your_private_registry_password" + auth = "your_private_registry_token" +``` + +Restart containerd: + +```shell +systemctl restart containerd +``` + +### Container Registry using self-signed certificates + +Use Harbor as an example of a container registry using self-signed certificates. +Harbor generates self-signed certificate, refer to [Harbor](https://goharbor.io/docs/2.11.0/install-config/configure-https/). + +#### Install Dragonfly with Helm Charts + +##### Create self-signed certificate secret for Seed Peer + +Create seed client secret configuration file `seed-client-secret.yaml`, configuration content is as follows: + +> Notice: yourdomain.crt is Harbor's ca.crt. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: seed-client-secret + namespace: dragonfly-system +type: Opaque +data: + # the data is abbreviated in this example. + yourdomain.crt: | + MIIFwTCCA6mgAwIBAgIUdgmYyNCw4t+Lp/... +``` + +Create the secret through the following command: + +```shell +kubectl apply -f seed-client-secret.yaml +``` + +##### Create self-signed certificate secret for Peer + +Create client secret configuration file `client-secret.yaml`, configuration content is as follows: + +> Notice: yourdomain.crt is Harbor's ca.crt. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: client-secret + namespace: dragonfly-system +type: Opaque +data: + # the data is abbreviated in this example. + yourdomain.crt: | + MIIFwTCCA6mgAwIBAgIUdgmYyNCw4t+Lp/... +``` + +Create the secret through the following command: + +```shell +kubectl apply -f client-secret.yaml +``` + +##### Create Dragonfly cluster based on helm charts {#harbor-create-dragonfly-cluster-based-on-helm-charts} + +Create helm charts configuration file `values.yaml`, configuration content is as follows: + +- Support preheating for harbor with self-signed certificates, + you need to change the `manager.config.job.preheat.tls` configuration, + `/etc/certs/yourdomain.crt` is the harbor self-signed certificate configuration file. + If you want to bypass TLS verification, please set `insecureSkipVerify` to `true`. + +- Support dragonfly as registry of containerd for harbor with self-signed certificates, + you need to change the `client.config.proxy.registryMirror` configuration and + `seedClient.config.proxy.registryMirror` configuration, + `https://yourdomain.com` is the harbor service address, + `/etc/certs/yourdomain.crt` is the harbor self-signed certificate configuration file. + +- Set the configuration of the containerd for harbor with self-signed certificates, + you need to change the `client.dfinit.config.containerRuntime.containerd.registries` configuration, + `yourdomain.com` is harbor registry host address, `https://yourdomain.com` is the Harbor service address. + If you want to bypass TLS verification, please set `skipVerify` to `true`. + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + job: + preheat: + tls: + insecureSkipVerify: false + caCert: /etc/certs/yourdomain.crt + extraVolumes: + - name: client-secret + secret: + secretName: client-secret + extraVolumeMounts: + - name: client-secret + mountPath: /etc/certs + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + proxy: + registryMirror: + addr: https://yourdomain.com + cert: /etc/certs/yourdomain.crt + extraVolumes: + - name: seed-client-secret + secret: + secretName: seed-client-secret + extraVolumeMounts: + - name: seed-client-secret + mountPath: /etc/certs + +client: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + proxy: + registryMirror: + addr: https://yourdomain.com + cert: /etc/certs/yourdomain.crt + extraVolumes: + - name: client-secret + secret: + secretName: client-secret + extraVolumeMounts: + - name: client-secret + mountPath: /etc/certs + dfinit: + enable: true + image: + repository: dragonflyoss/dfinit + tag: latest + config: + containerRuntime: + containerd: + configPath: /etc/containerd/config.toml + registries: + - hostNamespace: yourdomain.com + serverAddr: https://yourdomain.com + capabilities: ['pull', 'resolve'] + skipVerify: true +``` + +#### Install Dragonfly with Binaries + +Copy Harbor's ca.crt file to `/etc/certs/yourdomain.crt`. + +```shell +cp ca.crt /etc/certs/yourdomain.crt +``` + +Install Dragonfly with Binaries, refer to [Binaries](../../../getting-started/installation/binaries.md). + +##### Setup Manager and configure self-signed certificate + +To support preheating for harbor with self-signed certificates, the Manager configuration needs to be modified. + +Configure `manager.yaml`, the default path is `/etc/dragonfly/manager.yaml`, +refer to [manager config](../../../reference/configuration/manager.md). + +> Notice: `yourdomain.crt` is Harbor's ca.crt. + +```shell +job: + # Preheat configuration. + preheat: + tls: + # insecureSkipVerify controls whether a client verifies the server's certificate chain and hostname. + insecureSkipVerify: false + # # caCert is the CA certificate for preheat tls handshake, it can be path or PEM format string. + caCert: /etc/certs/yourdomain.crt +``` + +Skip TLS verification, set `job.preheat.tls.insecureSkipVerify` to true. + +```shell +job: + # Preheat configuration. + preheat: + tls: + # insecureSkipVerify controls whether a client verifies the server's certificate chain and hostname. + insecureSkipVerify: true + # # caCert is the CA certificate for preheat tls handshake, it can be path or PEM format string. + # caCert: '' +``` + +##### Setup Dfdaemon as Seed Peer and configure self-signed certificate + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`, +refer to [dfdaemon config](../../../reference/configuration/client/dfdaemon.md). + +```shell +manager: + addr: http://dragonfly-manager:65003 +seedPeer: + enable: true + type: super + clusterID: 1 +proxy: + registryMirror: + # addr is the default address of the registry mirror. Proxy will start a registry mirror service for the + # client to pull the image. The client can use the default address of the registry mirror in + # configuration to pull the image. The `X-Dragonfly-Registry` header can instead of the default address + # of registry mirror. + addr: https://yourdomain.com + ## cert is the client cert path with PEM format for the registry. + ## If registry use self-signed cert, the client should set the + ## cert for the registry mirror. + cert: /etc/certs/yourdomain.crt +``` + +##### Setup Dfdaemon as Peer and configure self-signed certificate + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`, +refer to [dfdaemon config](../../../reference/configuration/client/dfdaemon.md). + +```shell +manager: + addr: http://dragonfly-manager:65003 +proxy: + registryMirror: + # addr is the default address of the registry mirror. Proxy will start a registry mirror service for the + # client to pull the image. The client can use the default address of the registry mirror in + # configuration to pull the image. The `X-Dragonfly-Registry` header can instead of the default address + # of registry mirror. + addr: https://yourdomain.com + ## cert is the client cert path with PEM format for the registry. + ## If registry use self-signed cert, the client should set the + ## cert for the registry mirror. + cert: /etc/certs/yourdomain.crt +``` + +##### Configure containerd self-signed certificate + +Modify your `config.toml` (default location: `/etc/containerd/config.toml`), refer to [registry-configuration-examples](https://github.com/containerd/containerd/blob/main/docs/hosts.md#registry-configuration---examples). + +> Notice: config_path is the path where containerd looks for registry configuration files. + +```toml +# explicitly use v2 config format +version = 2 + +[plugins."io.containerd.grpc.v1.cri".registry] + config_path = "/etc/containerd/certs.d" +``` + +Create the registry configuration file `/etc/containerd/certs.d/yourdomain.com/hosts.toml`: + +> Notice: `https://yourdomain.com` is the Harbor service address. + +```toml +server = "https://yourdomain.com" + +[host."http://127.0.0.1:4001"] +capabilities = ["pull", "resolve"] +ca = "/etc/certs/yourdomain.crt" + +[host."http://127.0.0.1:4001".header] +X-Dragonfly-Registry = "https://yourdomain.com" +``` + +To bypass the TLS verification for a private registry at `yourdomain.com`. + +```toml +server = "https://yourdomain.com" + +[host."http://127.0.0.1:4001"] +capabilities = ["pull", "resolve"] +skip_verify = true + +[host."http://127.0.0.1:4001".header] +X-Dragonfly-Registry = "https://yourdomain.com" +``` + +Restart containerd: + +```shell +systemctl restart containerd +``` + +#### containerd downloads harbor images through Dragonfly + +```shell +crictl pull yourdomain.com/alpine:3.19 +``` diff --git a/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/cri-o.md b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/cri-o.md new file mode 100644 index 00000000..a9304ca0 --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/cri-o.md @@ -0,0 +1,513 @@ +--- +id: cri-o +title: CRI-O +slug: /operations/integrations/container-runtime/cri-o/ +--- + +Documentation for setting Dragonfly's container runtime to CRI-O. + +## Prerequisites {#prerequisites} + +| Name | Version | Document | +| ------------------ | ------- | --------------------------------------- | +| Kubernetes cluster | 1.20+ | [kubernetes.io](https://kubernetes.io/) | +| Helm | v3.8.0+ | [helm.sh](https://helm.sh/) | +| CRI-O | v1.5.0+ | [cri-o.io](https://cri-o.io/) | + +## Quick Start {#quick-start} + +### Setup kubernetes cluster {#setup-kubernetes-cluster} + +[Minikube](https://minikube.sigs.k8s.io/docs) is recommended if no Kubernetes cluster is available for testing. + +Create a Minikube cluster. + +```shell +minikube start --container-runtime=cri-o +``` + +Switch the context of kubectl to minikube cluster: + +```shell +kubectl config use-context minikube +``` + +### Minikube loads Dragonfly image {#minikube-loads-dragonfly-image} + +Pull Dragonfly latest images: + +```shell +docker pull dragonflyoss/scheduler:latest +docker pull dragonflyoss/manager:latest +docker pull dragonflyoss/client:latest +docker pull dragonflyoss/dfinit:latest +``` + +Minikube cluster loads Dragonfly latest images: + +```shell +minikube image load dragonflyoss/scheduler:latest +minikube image load dragonflyoss/manager:latest +minikube image load dragonflyoss/client:latest +minikube image load dragonflyoss/dfinit:latest +``` + +### Create Dragonfly cluster based on helm charts {#create-dragonfly-cluster-based-on-helm-charts} + +Create the Helm Charts configuration file `values.yaml`. Please refer to the +[configuration](https://artifacthub.io/packages/helm/dragonfly/dragonfly#values) documentation for details. + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + +client: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + dfinit: + enable: true + image: + repository: dragonflyoss/dfinit + tag: latest + config: + containerRuntime: + containerd: null + crio: + configPath: /etc/containers/registries.conf + unqualifiedSearchRegistries: ['registry.fedoraproject.org', 'registry.access.redhat.com', 'docker.io'] + registries: + - prefix: docker.io + location: docker.io +``` + +Create a Dragonfly cluster using the configuration file: + + + +```shell +$ helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ +$ helm install --create-namespace --namespace dragonfly-system dragonfly dragonfly/dragonfly -f values.yaml +NAME: dragonfly +LAST DEPLOYED: Mon Apr 28 10:59:19 2024 +NAMESPACE: dragonfly-system +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace dragonfly-system port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +2. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +3. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ +``` + + + +Check that Dragonfly is deployed successfully: + +```shell +$ kubectl get po -n dragonfly-system +NAME READY STATUS RESTARTS AGE +dragonfly-client-54vm5 1/1 Running 0 37m +dragonfly-client-cvbln 1/1 Running 0 37m +dragonfly-manager-864774f54d-njdhx 1/1 Running 0 37m +dragonfly-mysql-0 1/1 Running 0 37m +dragonfly-redis-master-0 1/1 Running 0 37m +dragonfly-redis-replicas-0 1/1 Running 0 37m +dragonfly-redis-replicas-1 1/1 Running 0 5m10s +dragonfly-redis-replicas-2 1/1 Running 0 4m44s +dragonfly-scheduler-0 1/1 Running 0 37m +dragonfly-seed-client-0 1/1 Running 2 (27m ago) 37m +``` + +### CRI-O downloads images through Dragonfly {#crio-downloads-images-through-dragonfly} + +Pull `alpine:3.19` image in minikube node: + +```shell +docker exec -i minikube /usr/bin/crictl pull alpine:3.19 +``` + +#### Verify {#verify} + +You can execute the following command to check if the `alpine:3.19` image is distributed via Dragonfly. + + + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="minikube")].metadata.name}' | head -n 1 ) + +# Find peer id. +export TASK_ID=$(kubectl -n dragonfly-system exec ${POD_NAME} -- sh -c "grep -hoP 'library/alpine.*task_id=\"\K[^\"]+' /var/log/dragonfly/dfdaemon/* | head -n 1") + +# Check logs. +kubectl -n dragonfly-system exec -it ${POD_NAME} -- sh -c "grep ${TASK_ID} /var/log/dragonfly/dfdaemon/* | grep 'download task succeeded'" +``` + + + +The expected output is as follows: + +```shell +{ + 2024-04-19T02:44:09.259458Z INFO + "download_task":"dragonfly-client/src/grpc/dfdaemon_download.rs:276":: "download task succeeded" + "host_id": "172.18.0.3-minikube", + "task_id": "a46de92fcb9430049cf9e61e267e1c3c9db1f1aa4a8680a048949b06adb625a5", + "peer_id": "172.18.0.3-minikube-86e48d67-1653-4571-bf01-7e0c9a0a119d" +} +``` + +## More configurations + +### Container Registry using self-signed certificates + +Use Harbor as an example of a container registry using self-signed certificates. +Harbor generates self-signed certificate, refer to [Harbor](https://goharbor.io/docs/2.11.0/install-config/configure-https/). + +#### Install Dragonfly with Helm Charts + +##### Create self-signed certificate secret for Seed Peer + +Create seed client secret configuration file `seed-client-secret.yaml`, configuration content is as follows: + +> Notice: yourdomain.crt is Harbor's ca.crt. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: seed-client-secret + namespace: dragonfly-system +type: Opaque +data: + # the data is abbreviated in this example. + yourdomain.crt: | + MIIFwTCCA6mgAwIBAgIUdgmYyNCw4t+Lp/... +``` + +Create the secret through the following command: + +```shell +kubectl apply -f seed-client-secret.yaml +``` + +##### Create self-signed certificate secret for Peer + +Create client secret configuration file `client-secret.yaml`, configuration content is as follows: + +> Notice: yourdomain.crt is Harbor's ca.crt. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: client-secret + namespace: dragonfly-system +type: Opaque +data: + # the data is abbreviated in this example. + yourdomain.crt: | + MIIFwTCCA6mgAwIBAgIUdgmYyNCw4t+Lp/... +``` + +Create the secret through the following command: + +```shell +kubectl apply -f client-secret.yaml +``` + +##### Create Dragonfly cluster based on helm charts {#harbor-create-dragonfly-cluster-based-on-helm-charts} + +Create helm charts configuration file `values.yaml`, configuration content is as follows: + +- Support preheating for harbor with self-signed certificates, + you need to change the `manager.config.job.preheat.tls` configuration, + `/etc/certs/yourdomain.crt` is the harbor self-signed certificate configuration file. + If you want to bypass TLS verification, please set `insecureSkipVerify` to `true`. + +- Support dragonfly as registry of containerd for harbor with self-signed certificates, + you need to change the `client.config.proxy.registryMirror` configuration and + `seedClient.config.proxy.registryMirror` configuration, + `https://yourdomain.com` is the harbor service address, + `/etc/certs/yourdomain.crt` is the harbor self-signed certificate configuration file. + +- Set the configuration of the containerd for harbor with self-signed certificates, + you need to change the `client.dfinit.config.containerRuntime.crio.registries` configuration, + `yourdomain.com` is the harbor registry host address. CRI-O skips TLS verification by default (no certificate required). + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + job: + preheat: + tls: + insecureSkipVerify: false + caCert: /etc/certs/yourdomain.crt + extraVolumes: + - name: client-secret + secret: + secretName: client-secret + extraVolumeMounts: + - name: client-secret + mountPath: /etc/certs + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + proxy: + registryMirror: + addr: https://yourdomain.com + cert: /etc/certs/yourdomain.crt + extraVolumes: + - name: seed-client-secret + secret: + secretName: seed-client-secret + extraVolumeMounts: + - name: seed-client-secret + mountPath: /etc/certs + +client: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + proxy: + registryMirror: + addr: https://yourdomain.com + cert: /etc/certs/yourdomain.crt + extraVolumes: + - name: client-secret + secret: + secretName: client-secret + extraVolumeMounts: + - name: client-secret + mountPath: /etc/certs + dfinit: + enable: true + image: + repository: dragonflyoss/dfinit + tag: latest + config: + containerRuntime: + containerd: null + crio: + configPath: /etc/containers/registries.conf + unqualifiedSearchRegistries: ['registry.fedoraproject.org', 'registry.access.redhat.com', 'docker.io'] + registries: + - prefix: yourdomain.com + location: yourdomain.com +``` + +#### Install Dragonfly with Binaries + +Copy Harbor's ca.crt file to `/etc/containers/certs.d/yourdomain.crt`. + +```shell +cp ca.crt /etc/containers/certs.d/yourdomain.crt +``` + +Install Dragonfly with Binaries, refer to [Binaries](../../../getting-started/installation/binaries.md). + +##### Setup Manager and configure self-signed certificate + +To support preheating for harbor with self-signed certificates, the Manager configuration needs to be modified. + +Configure `manager.yaml`, the default path is `/etc/dragonfly/manager.yaml`, +refer to [manager config](../../../reference/configuration/manager.md). + +> Notice: `yourdomain.crt` is Harbor's ca.crt. + +```shell +job: + # Preheat configuration. + preheat: + tls: + # insecureSkipVerify controls whether a client verifies the server's certificate chain and hostname. + insecureSkipVerify: false + # # caCert is the CA certificate for preheat tls handshake, it can be path or PEM format string. + caCert: /etc/certs/yourdomain.crt +``` + +Skip TLS verification, set `job.preheat.tls.insecureSkipVerify` to true. + +```shell +job: + # Preheat configuration. + preheat: + tls: + # insecureSkipVerify controls whether a client verifies the server's certificate chain and hostname. + insecureSkipVerify: true + # # caCert is the CA certificate for preheat tls handshake, it can be path or PEM format string. + # caCert: '' +``` + +##### Setup Dfdaemon as Seed Peer and configure self-signed certificate + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`, +refer to [dfdaemon config](../../../reference/configuration/client/dfdaemon.md). + +```shell +manager: + addr: http://dragonfly-manager:65003 +seedPeer: + enable: true + type: super + clusterID: 1 +proxy: + registryMirror: + # addr is the default address of the registry mirror. Proxy will start a registry mirror service for the + # client to pull the image. The client can use the default address of the registry mirror in + # configuration to pull the image. The `X-Dragonfly-Registry` header can instead of the default address + # of registry mirror. + addr: https://yourdomain.com + ## cert is the client cert path with PEM format for the registry. + ## If registry use self-signed cert, the client should set the + ## cert for the registry mirror. + cert: /etc/certs/yourdomain.crt +``` + +##### Setup Dfdaemon as Peer and configure self-signed certificate + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`, +refer to [dfdaemon config](../../../reference/configuration/client/dfdaemon.md). + +```shell +manager: + addr: http://dragonfly-manager:65003 +proxy: + registryMirror: + # addr is the default address of the registry mirror. Proxy will start a registry mirror service for the + # client to pull the image. The client can use the default address of the registry mirror in + # configuration to pull the image. The `X-Dragonfly-Registry` header can instead of the default address + # of registry mirror. + addr: https://yourdomain.com + ## cert is the client cert path with PEM format for the registry. + ## If registry use self-signed cert, the client should set the + ## cert for the registry mirror. + cert: /etc/certs/yourdomain.crt +``` + +##### Configure CRI-O self-signed certificate + +A custom TLS configuration for a container registry can be configured by creating a directory under `/etc/containers/certs.d`. +The name of the directory must correspond to the host:port of the registry (e.g., yourdomain.com:port), +refer to [containers-certs.d](https://github.com/containers/image/blob/main/docs/containers-certs.d.5.md). + +```shell +cp yourdomain.com.cert /etc/containers/certs.d/yourdomain.com/ +cp yourdomain.com.key /etc/containers/certs.d/yourdomain.com/ +cp ca.crt /etc/containers/certs.d/yourdomain.com/ +``` + +The following example illustrates a configuration that uses custom certificates. + +```shell +/etc/containers/certs.d/ <- Certificate directory +└── yourdomain.com:port <- Hostname:port + ├── yourdomain.com.cert <- Harbor certificate + ├── yourdomain.com.key <- Harbor key + └── ca.crt <- Certificate authority that signed the registry certificate +``` + +Modify your `registries.conf` (default location: `/etc/containers/registries.conf`), refer to [containers-registries.conf](https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md). + +> Notice: `yourdomain.com` is the Harbor service address. + +```toml +[[registry]] +prefix = "yourdomain.com" +location = "yourdomain.com" + +[[registry.mirror]] +location = "127.0.0.1:4001" +``` + +To bypass the TLS verification for a private registry at `yourdomain.com`. + +```toml +[[registry]] +prefix = "yourdomain.com" +location = "yourdomain.com" + +[[registry.mirror]] +insecure = true +location = "127.0.0.1:4001" +``` + +Restart crio: + +```shell +systemctl restart crio +``` + +#### CRI-O downloads harbor images through Dragonfly + +```shell +crictl pull yourdomain.com/alpine:3.19 +``` diff --git a/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/docker.md b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/docker.md new file mode 100644 index 00000000..7b8d3ec1 --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/docker.md @@ -0,0 +1,7 @@ +--- +id: docker +title: Docker +slug: /operations/integrations/container-runtime/docker/ +--- + +Documentation for setting Dragonfly's container runtime to Docker. Dragonfly v2.2.0 drops support for `Docker`. diff --git a/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/nydus.md b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/nydus.md new file mode 100644 index 00000000..8860fabd --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/nydus.md @@ -0,0 +1,510 @@ +--- +id: nydus +title: Nydus +slug: /operations/integrations/container-runtime/nydus/ +--- + +This document will help you experience how to use Dragonfly & Nydus. + +## Prerequisites {#prerequisites} + + + +| Name | Version | Document | +| ------------------ | ------- | ----------------------------------------------------------- | +| Kubernetes cluster | 1.20+ | [kubernetes.io](https://kubernetes.io/) | +| Helm | 3.8.0+ | [helm.sh](https://helm.sh/) | +| containerd | v1.4.3+ | [containerd.io](https://containerd.io/) | +| Nerdctl | 0.22+ | [containerd/nerdctl](https://github.com/containerd/nerdctl) | + + + +## Install Nydus with Helm + +We **recommend** using helm to install Nydus, please refer to [Install Dragonfly & Nydus with Helm](https://github.com/dragonflyoss/helm-charts/blob/main/INSTALL.md). + +## Install Nydus with Binaries + +### Dragonfly Kubernetes Cluster Setup {#dragonfly-kubernetes-cluster-setup} + +For detailed installation documentation based on kubernetes cluster, please refer to [quick-start-kubernetes](../../../getting-started/quick-start/kubernetes.md). + +#### Setup kubernetes cluster {#setup-kubernetes-cluster} + +[Kind](https://kind.sigs.k8s.io/) is recommended if no Kubernetes cluster is available for testing. + +Create kind multi-node cluster configuration file `kind-config.yaml`, configuration content is as follows: + +```yaml +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + extraPortMappings: + - containerPort: 30950 + hostPort: 4001 + - containerPort: 30951 + hostPort: 4003 + - role: worker +``` + +Create a kind multi-node cluster using the configuration file: + +```shell +kind create cluster --config kind-config.yaml +``` + +Switch the context of kubectl to kind cluster: + +```shell +kubectl config use-context kind-kind +``` + +#### Kind loads Dragonfly image {#kind-loads-dragonfly-image} + +Pull Dragonfly latest images: + +```shell +docker pull dragonflyoss/scheduler:latest +docker pull dragonflyoss/manager:latest +docker pull dragonflyoss/client:latest +``` + +Kind cluster loads Dragonfly latest images: + +```shell +kind load docker-image dragonflyoss/scheduler:latest +kind load docker-image dragonflyoss/manager:latest +kind load docker-image dragonflyoss/client:latest +``` + +#### Create Dragonfly cluster based on helm charts {#create-dragonfly-cluster-based-on-helm-charts} + +Create helm charts configuration file `charts-config.yaml` and enable prefetching, configuration content is as follows: + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + proxy: + prefetch: true + +client: + image: + repository: dragonflyoss/client + tag: latest + hostNetwork: true + metrics: + enable: true + config: + verbose: true + proxy: + prefetch: true + server: + port: 4001 + registryMirror: + addr: https://index.docker.io + rules: + - regex: 'blobs/sha256.*' +``` + +Create a Dragonfly cluster using the configuration file: + + + +```shell +$ helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ +$ helm install --wait --create-namespace --namespace dragonfly-system dragonfly dragonfly/dragonfly -f charts-config.yaml +NAME: dragonfly +LAST DEPLOYED: Mon May 27 19:56:34 2024 +NAMESPACE: dragonfly-system +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace dragonfly-system port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +2. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +3. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ +``` + + + +Check that dragonfly is deployed successfully: + +```shell +$ kubectl get po -n dragonfly-system +NAME READY STATUS RESTARTS AGE +dragonfly-client-9rkgp 1/1 Running 1 (6h29m ago) 9h +dragonfly-client-l2czc 1/1 Running 2 (6h29m ago) 9h +dragonfly-manager-789f57fc65-t44tf 1/1 Running 2 (6h28m ago) 9h +dragonfly-mysql-0 1/1 Running 3 (6h28m ago) 9h +dragonfly-redis-master-0 1/1 Running 3 (6h28m ago) 9h +dragonfly-redis-replicas-0 1/1 Running 7 (6h28m ago) 9h +dragonfly-redis-replicas-1 1/1 Running 2 (6h28m ago) 8h +dragonfly-redis-replicas-2 1/1 Running 2 (6h28m ago) 8h +dragonfly-scheduler-0 1/1 Running 2 (6h28m ago) 9h +dragonfly-scheduler-1 1/1 Running 2 (6h28m ago) 8h +dragonfly-scheduler-2 1/1 Running 2 (6h28m ago) 8h +dragonfly-seed-client-0 1/1 Running 8 (6h27m ago) 9h +dragonfly-seed-client-1 1/1 Running 4 (6h27m ago) 8h +dragonfly-seed-client-2 1/1 Running 4 (6h27m ago) 8h +``` + +Create peer service configuration file `peer-service-config.yaml`, configuration content is as follows: + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: peer + namespace: dragonfly-system +spec: + type: NodePort + ports: + - name: http-4001 + nodePort: 30950 + port: 4001 + - name: http-4003 + nodePort: 30951 + port: 4003 + selector: + app: dragonfly + component: client + release: dragonfly +``` + +Create a peer service using the configuration file: + +```shell +kubectl apply -f peer-service-config.yaml +``` + +### Nydus Setup for containerd Environment {#nydus-for-containerd-environment} + +For detailed Nydus installation documentation based on containerd environment, please refer to +[nydus-setup-for-containerd-environment](https://github.com/dragonflyoss/image-service/blob/master/docs/containerd-env-setup.md#nydus-setup-for-containerd-environment). +The example uses Systemd to manage the `nydus-snapshotter` service. + +#### From the Binary Releases {#install-nydus-tools} + +Download the `Nydus Snapshotter` binaries, please refer to [nydus-snapshotter/releases](https://github.com/containerd/nydus-snapshotter/releases/latest): + +> Notice: `your_nydus_snapshotter_version` is recommended to use the latest version. + +```shell +NYDUS_SNAPSHOTTER_VERSION= +wget -O nydus-snapshotter_linux_arm64.tar.gz https://github.com/containerd/nydus-snapshotter/releases/download/v$NYDUS_SNAPSHOTTER_VERSION/nydus-snapshotter-v$NYDUS_SNAPSHOTTER_VERSION-linux-arm64.tar.gz +``` + +Untar the package: + +```shell +tar zxvf nydus-snapshotter_linux_arm64.tar.gz + +# Install executable file to /usr/local/bin/{containerd-nydus-grpc}. +sudo cp bin/containerd-nydus-grpc /usr/local/bin/ +``` + +Download the `Nydus Image Service` binaries, please refer to [dragonflyoss/image-service](https://github.com/dragonflyoss/image-service/releases/latest): + +> Notice: `your_nydus_version` is recommended to use the latest version. + +```shell +NYDUS_VERSION= +wget -O nydus-image-service-linux-arm64.tgz https://github.com/dragonflyoss/image-service/releases/download/v$NYDUS_VERSION/nydus-static-v$NYDUS_VERSION-linux-arm64.tgz +``` + +Untar the package: + +```shell +tar zxvf nydus-image-service-linux-arm64.tgz + +# Install executable file to /usr/local/bin/{nydus-image,nydusd,nydusify}. +sudo cp nydus-static/nydus-image nydus-static/nydusd nydus-static/nydusify /usr/local/bin/ +``` + +#### Install Nydus Snapshotter plugin for containerd {#install-nydus-snapshotter-plugin-for-containerd} + +Modify your `config.toml` (default location: `/etc/containerd/config.toml`), please refer to +[configure-and-start-containerd](https://github.com/dragonflyoss/image-service/blob/master/docs/containerd-env-setup.md#configure-and-start-containerd). + +```toml +[proxy_plugins] + [proxy_plugins.nydus] + type = "snapshot" + address = "/run/containerd-nydus/containerd-nydus-grpc.sock" + +[plugins.cri] + [plugins.cri.containerd] + snapshotter = "nydus" + disable_snapshot_annotations = false +``` + +Restart containerd: + +```shell +sudo systemctl restart containerd +``` + +Check that containerd uses the `nydus-snapshotter` plugin: + +```shell +$ ctr -a /run/containerd/containerd.sock plugin ls | grep nydus +io.containerd.snapshotter.v1 nydus - ok +``` + +#### Systemd starts Nydus Snapshotter {#systemd-starts-snapshotter-service} + +Create the Nydusd configuration file `nydusd-config.json`. +Please refer to the [Nydus Mirror](https://github.com/dragonflyoss/image-service/blob/master/docs/nydusd.md#enable-mirrors-for-storage-backend) +documentation for details. + +Set the `backend.config.mirrors.host` and `backend.config.mirrors.ping_url` +address in the configuration file to your actual address. Configuration content is as follows: + +```json +{ + "device": { + "backend": { + "type": "registry", + "config": { + "mirrors": [ + { + "host": "http://127.0.0.1:4001", + "auth_through": false, + "headers": { + "X-Dragonfly-Registry": "https://index.docker.io" + }, + "ping_url": "http://127.0.0.1:4003/healthy" + } + ], + "scheme": "https", + "skip_verify": true, + "timeout": 10, + "connect_timeout": 10, + "retry_limit": 2 + } + }, + "cache": { + "type": "blobcache", + "config": { + "work_dir": "/var/lib/nydus/cache/" + } + } + }, + "mode": "direct", + "digest_validate": false, + "iostats_files": false, + "enable_xattr": true, + "fs_prefetch": { + "enable": true, + "threads_count": 10, + "merging_size": 131072, + "bandwidth_rate": 1048576 + } +} +``` + +Copy configuration file to `/etc/nydus/config.json`: + +```shell +sudo mkdir /etc/nydus && cp nydusd-config.json /etc/nydus/config.json +``` + +Create systemd configuration file `nydus-snapshotter.service` of Nydus snapshotter, configuration content is as follows: + +```text +[Unit] +Description=nydus snapshotter +After=network.target +Before=containerd.service + +[Service] +Type=simple +Environment=HOME=/root +ExecStart=/usr/local/bin/containerd-nydus-grpc --nydusd-config /etc/nydus/config.json +Restart=always +RestartSec=1 +KillMode=process +OOMScoreAdjust=-999 +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target +``` + +Copy configuration file to `/etc/systemd/system/`: + +```shell +sudo cp nydus-snapshotter.service /etc/systemd/system/ +``` + +Systemd starts nydus snapshotter service: + + + +```shell +$ sudo systemctl enable nydus-snapshotter +$ sudo systemctl start nydus-snapshotter +$ sudo systemctl status nydus-snapshotter +● nydus-snapshotter.service - nydus snapshotter + Loaded: loaded (/etc/systemd/system/nydus-snapshotter.service; enabled; vendor preset: enabled) + Active: active (running) since Wed 2022-10-19 08:01:00 UTC; 2s ago + Main PID: 2853636 (containerd-nydu) + Tasks: 9 (limit: 37574) + Memory: 4.6M + CPU: 20ms + CGroup: /system.slice/nydus-snapshotter.service + └─2853636 /usr/local/bin/containerd-nydus-grpc --config-path /etc/nydus/config.json + +Oct 19 08:01:00 kvm-gaius-0 systemd[1]: Started nydus snapshotter. +Oct 19 08:01:00 kvm-gaius-0 containerd-nydus-grpc[2853636]: time="2022-10-19T08:01:00.493700269Z" level=info msg="gc goroutine start..." +Oct 19 08:01:00 kvm-gaius-0 containerd-nydus-grpc[2853636]: time="2022-10-19T08:01:00.493947264Z" level=info msg="found 0 daemons running" +``` + + + +#### Convert an image to Nydus image {#convert-an-image-to-nydus-image} + +Convert `alpine:3.19` image to Nydus image, +Conversion tool can use [nydusify](https://github.com/dragonflyoss/image-service/blob/master/docs/nydusify.md) and [acceld](https://github.com/goharbor/acceleration-service). + +Login to Dockerhub: + +```shell +docker login +``` + +Convert `alpine:3.19` image to Nydus image, and `DOCKERHUB_REPO_NAME` environment variable +needs to be set to the user's image repository: + +```shell +DOCKERHUB_REPO_NAME= +sudo nydusify convert --nydus-image /usr/local/bin/nydus-image --source alpine:3.19 --target $DOCKERHUB_REPO_NAME/alpine:3.19-nydus +``` + +#### Nydus downloads images through Dragonfly {#nydus-downloads-images-through-dragonfly} + +Running `alpine:3.19-nydus` with nerdctl: + +```shell +sudo nerdctl --snapshotter nydus run --rm -it $DOCKERHUB_REPO_NAME/alpine:3.19-nydus +``` + + + +#### Verify + +Check that Nydus is downloaded via Dragonfly based on mirror mode: + +```shell +# Check Nydus logs. +grep mirrors /var/lib/containerd-nydus/logs/**/*log +``` + +The expected output is as follows: + +```shell +[2024-05-28 12:36:24.834434 +00:00] INFO backend config: ConnectionConfig { proxy: ProxyConfig { url: "", ping_url: "", fallback: false, check_interval: 5, use_http: false }, mirrors: [MirrorConfig { host: "http://127.0.0.1:4001", ping_url: "http://127.0.0.1:4003/healthy", headers: {"X-Dragonfly-Registry": "https://index.docker.io"}, health_check_interval: 5, failure_limit: 5 }], skip_verify: true, timeout: 10, connect_timeout: 10, retry_limit: 2 } +``` + +You can execute the following command to check if the `alpine:3.19` image is distributed via Dragonfly. + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="kind-worker")].metadata.name}' | head -n 1 ) + +# Find task id. +export TASK_ID=$(kubectl -n dragonfly-system exec ${POD_NAME} -- sh -c "grep -hoP 'alpine.*task_id=\"\K[^\"]+' /var/log/dragonfly/dfdaemon/* | head -n 1") + +# Check logs. +kubectl -n dragonfly-system exec -it ${POD_NAME} -- sh -c "grep ${TASK_ID} /var/log/dragonfly/dfdaemon/* | grep 'download task succeeded'" +``` + +The expected output is as follows: + +```shell +{ + 2024-04-19T02:44:09.259458Z "INFO" + "download_task":"dragonfly-client/src/grpc/dfdaemon_download.rs:276":: "download task succeeded" + "host_id": "172.18.0.3-kind-worker", + "task_id": "a46de92fcb9430049cf9e61e267e1c3c9db1f1aa4a8680a048949b06adb625a5", + "peer_id": "172.18.0.3-kind-worker-86e48d67-1653-4571-bf01-7e0c9a0a119d" +} +``` + + + +## Performance testing {#performance-testing} + +Test the performance of single-machine image download after the integration of +`Nydus Mirror` mode and `Dragonfly P2P`. +Test running version commands using images in different languages. +For example, the startup command used to run a `python` image is `python -V`. +The tests were performed on the same machine. +Due to the influence of the network environment of the machine itself, +the actual download time is not important, but the ratio of the increase in +the download time in different scenarios is very important. + +![nydus-mirror-dragonfly](../../../resource/operations/integrations/nydus-mirror-dragonfly.png) + +- OCIv1: Use containerd to pull image directly. +- Nydus Cold Boot: Use containerd to pull image via nydus-snapshotter and doesn't hit any cache. +- Nydus & Dragonfly Cold Boot: Use containerd to pull image via nydus-snapshotter. + Transfer the traffic to Dragonfly P2P based on Nydus Mirror mode and no cache hits. +- Hit Dragonfly Remote Peer Cache: Use containerd to pull image via nydus-snapshotter. + Transfer the traffic to Dragonfly P2P based on Nydus Mirror mode and hit the remote peer cache. +- Hit Dragonfly Local Peer Cache: Use containerd to pull image via nydus-snapshotter. + Transfer the traffic to Dragonfly P2P based on Nydus Mirror mode and hit the local peer cache. +- Hit Nydus Cache: Use containerd to pull image via nydus-snapshotter. + Transfer the traffic to Dragonfly P2P based on Nydus Mirror mode and hit the nydus local cache. + +Test results show `Nydus Mirror` mode and `Dragonfly P2P` integration. +Use the `Nydus` download image to compare the `OCIv1` mode, +It can effectively reduce the image download time. +The cold boot of `Nydus` and `Nydus & Dragonfly` are basically close. +All hits to `Dragonfly` cache are better than `Nydus` only. +The most important thing is that if a very large `kubernetes` cluster uses `Nydus` to pull images. +The download of each image layer will be generate as many range requests as needed. +The `QPS` of the source of the registry is too high. +Causes the `QPS` of the registry to be relatively high. +Dragonfly can effectively reduce the number of requests and +download traffic for back-to-source registry. +In the best case, `Dragonfly` can make the same task back-to-source download only once. diff --git a/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/podman.md b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/podman.md new file mode 100644 index 00000000..a4c92f38 --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/podman.md @@ -0,0 +1,513 @@ +--- +id: podman +title: Podman +slug: /operations/integrations/container-runtime/podman/ +--- + +Documentation for setting Dragonfly's container runtime to Podman. + +## Prerequisites {#prerequisites} + +| Name | Version | Document | +| ------------------ | ------- | --------------------------------------- | +| Kubernetes cluster | 1.20+ | [kubernetes.io](https://kubernetes.io/) | +| Helm | v3.8.0+ | [helm.sh](https://helm.sh/) | +| Podman | v1.5.0+ | [podman.io](https://podman.io/) | + +## Quick Start {#quick-start} + +### Setup kubernetes cluster {#setup-kubernetes-cluster} + +[Minikube](https://minikube.sigs.k8s.io/docs) is recommended if no Kubernetes cluster is available for testing. + +Create a Minikube cluster. + +```shell +minikube start --driver=podman --container-runtime=cri-o +``` + +Switch the context of kubectl to minikube cluster: + +```shell +kubectl config use-context minikube +``` + +### Minikube loads Dragonfly image {#minikube-loads-dragonfly-image} + +Pull Dragonfly latest images: + +```shell +docker pull dragonflyoss/scheduler:latest +docker pull dragonflyoss/manager:latest +docker pull dragonflyoss/client:latest +docker pull dragonflyoss/dfinit:latest +``` + +Minikube cluster loads Dragonfly latest images: + +```shell +minikube image load dragonflyoss/scheduler:latest +minikube image load dragonflyoss/manager:latest +minikube image load dragonflyoss/client:latest +minikube image load dragonflyoss/dfinit:latest +``` + +### Create Dragonfly cluster based on helm charts {#create-dragonfly-cluster-based-on-helm-charts} + +Create the Helm Charts configuration file `values.yaml`. Please refer to the +[configuration](https://artifacthub.io/packages/helm/dragonfly/dragonfly#values) documentation for details. + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + +client: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + dfinit: + enable: true + image: + repository: dragonflyoss/dfinit + tag: latest + config: + containerRuntime: + containerd: null + podman: + configPath: /etc/containers/registries.conf + unqualifiedSearchRegistries: ['registry.fedoraproject.org', 'registry.access.redhat.com', 'docker.io'] + registries: + - prefix: docker.io + location: docker.io +``` + +Create a Dragonfly cluster using the configuration file: + + + +```shell +$ helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ +$ helm install --create-namespace --namespace dragonfly-system dragonfly dragonfly/dragonfly -f values.yaml +NAME: dragonfly +LAST DEPLOYED: Mon Apr 28 10:59:19 2024 +NAMESPACE: dragonfly-system +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace dragonfly-system port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +2. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +3. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ +``` + + + +Check that Dragonfly is deployed successfully: + +```shell +$ kubectl get po -n dragonfly-system +NAME READY STATUS RESTARTS AGE +dragonfly-client-54vm5 1/1 Running 0 37m +dragonfly-client-cvbln 1/1 Running 0 37m +dragonfly-manager-864774f54d-njdhx 1/1 Running 0 37m +dragonfly-mysql-0 1/1 Running 0 37m +dragonfly-redis-master-0 1/1 Running 0 37m +dragonfly-redis-replicas-0 1/1 Running 0 37m +dragonfly-redis-replicas-1 1/1 Running 0 5m10s +dragonfly-redis-replicas-2 1/1 Running 0 4m44s +dragonfly-scheduler-0 1/1 Running 0 37m +dragonfly-seed-client-0 1/1 Running 2 (27m ago) 37m +``` + +### Podman downloads images through Dragonfly {#crio-downloads-images-through-dragonfly} + +Pull `alpine:3.19` image in minikube node: + +```shell +docker exec -i minikube /usr/bin/podman pull alpine:3.19 +``` + +#### Verify {#verify} + +You can execute the following command to check if the `alpine:3.19` image is distributed via Dragonfly. + + + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="minikube")].metadata.name}' | head -n 1 ) + +# Find peer id. +export TASK_ID=$(kubectl -n dragonfly-system exec ${POD_NAME} -- sh -c "grep -hoP 'library/alpine.*task_id=\"\K[^\"]+' /var/log/dragonfly/dfdaemon/* | head -n 1") + +# Check logs. +kubectl -n dragonfly-system exec -it ${POD_NAME} -- sh -c "grep ${TASK_ID} /var/log/dragonfly/dfdaemon/* | grep 'download task succeeded'" +``` + + + +The expected output is as follows: + +```shell +{ + 2024-04-19T02:44:09.259458Z INFO + "download_task":"dragonfly-client/src/grpc/dfdaemon_download.rs:276":: "download task succeeded" + "host_id": "172.18.0.3-minikube", + "task_id": "a46de92fcb9430049cf9e61e267e1c3c9db1f1aa4a8680a048949b06adb625a5", + "peer_id": "172.18.0.3-minikube-86e48d67-1653-4571-bf01-7e0c9a0a119d" +} +``` + +## More configurations + +### Container Registry using self-signed certificates + +Use Harbor as an example of a container registry using self-signed certificates. +Harbor generates self-signed certificate, refer to [Harbor](https://goharbor.io/docs/2.11.0/install-config/configure-https/). + +#### Install Dragonfly with Helm Charts + +##### Create self-signed certificate secret for Seed Peer + +Create seed client secret configuration file `seed-client-secret.yaml`, configuration content is as follows: + +> Notice: yourdomain.crt is Harbor's ca.crt. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: seed-client-secret + namespace: dragonfly-system +type: Opaque +data: + # the data is abbreviated in this example. + yourdomain.crt: | + MIIFwTCCA6mgAwIBAgIUdgmYyNCw4t+Lp/... +``` + +Create the secret through the following command: + +```shell +kubectl apply -f seed-client-secret.yaml +``` + +##### Create self-signed certificate secret for Peer + +Create client secret configuration file `client-secret.yaml`, configuration content is as follows: + +> Notice: yourdomain.crt is Harbor's ca.crt. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: client-secret + namespace: dragonfly-system +type: Opaque +data: + # the data is abbreviated in this example. + yourdomain.crt: | + MIIFwTCCA6mgAwIBAgIUdgmYyNCw4t+Lp/... +``` + +Create the secret through the following command: + +```shell +kubectl apply -f client-secret.yaml +``` + +##### Create Dragonfly cluster based on helm charts {#harbor-create-dragonfly-cluster-based-on-helm-charts} + +Create helm charts configuration file `values.yaml`, configuration content is as follows: + +- Support preheating for harbor with self-signed certificates, + you need to change the `manager.config.job.preheat.tls` configuration, + `/etc/certs/yourdomain.crt` is the harbor self-signed certificate configuration file. + If you want to bypass TLS verification, please set `insecureSkipVerify` to `true`. + +- Support dragonfly as registry of containerd for harbor with self-signed certificates, + you need to change the `client.config.proxy.registryMirror` configuration and + `seedClient.config.proxy.registryMirror` configuration, + `https://yourdomain.com` is the harbor service address, + `/etc/certs/yourdomain.crt` is the harbor self-signed certificate configuration file. + +- Set the configuration of the containerd for harbor with self-signed certificates, + you need to change the `client.dfinit.config.containerRuntime.podman.registries` configuration, + `yourdomain.com` is the harbor registry host address. CRI-O skips TLS verification by default (no certificate required). + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + job: + preheat: + tls: + insecureSkipVerify: false + caCert: /etc/certs/yourdomain.crt + extraVolumes: + - name: client-secret + secret: + secretName: client-secret + extraVolumeMounts: + - name: client-secret + mountPath: /etc/certs + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + proxy: + registryMirror: + addr: https://yourdomain.com + cert: /etc/certs/yourdomain.crt + extraVolumes: + - name: seed-client-secret + secret: + secretName: seed-client-secret + extraVolumeMounts: + - name: seed-client-secret + mountPath: /etc/certs + +client: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + proxy: + registryMirror: + addr: https://yourdomain.com + cert: /etc/certs/yourdomain.crt + extraVolumes: + - name: client-secret + secret: + secretName: client-secret + extraVolumeMounts: + - name: client-secret + mountPath: /etc/certs + dfinit: + enable: true + image: + repository: dragonflyoss/dfinit + tag: latest + config: + containerRuntime: + containerd: null + podman: + configPath: /etc/containers/registries.conf + unqualifiedSearchRegistries: ['registry.fedoraproject.org', 'registry.access.redhat.com', 'docker.io'] + registries: + - prefix: yourdomain.com + location: yourdomain.com +``` + +#### Install Dragonfly with Binaries + +Copy Harbor's ca.crt file to `/etc/containers/certs.d/yourdomain.crt`. + +```shell +cp ca.crt /etc/containers/certs.d/yourdomain.crt +``` + +Install Dragonfly with Binaries, refer to [Binaries](../../../getting-started/installation/binaries.md). + +##### Setup Manager and configure self-signed certificate + +To support preheating for harbor with self-signed certificates, the Manager configuration needs to be modified. + +Configure `manager.yaml`, the default path is `/etc/dragonfly/manager.yaml`, +refer to [manager config](../../../reference/configuration/manager.md). + +> Notice: `yourdomain.crt` is Harbor's ca.crt. + +```shell +job: + # Preheat configuration. + preheat: + tls: + # insecureSkipVerify controls whether a client verifies the server's certificate chain and hostname. + insecureSkipVerify: false + # # caCert is the CA certificate for preheat tls handshake, it can be path or PEM format string. + caCert: /etc/certs/yourdomain.crt +``` + +Skip TLS verification, set `job.preheat.tls.insecureSkipVerify` to true. + +```shell +job: + # Preheat configuration. + preheat: + tls: + # insecureSkipVerify controls whether a client verifies the server's certificate chain and hostname. + insecureSkipVerify: true + # # caCert is the CA certificate for preheat tls handshake, it can be path or PEM format string. + # caCert: '' +``` + +##### Setup Dfdaemon as Seed Peer and configure self-signed certificate + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`, +refer to [dfdaemon config](../../../reference/configuration/client/dfdaemon.md). + +```shell +manager: + addr: http://dragonfly-manager:65003 +seedPeer: + enable: true + type: super + clusterID: 1 +proxy: + registryMirror: + # addr is the default address of the registry mirror. Proxy will start a registry mirror service for the + # client to pull the image. The client can use the default address of the registry mirror in + # configuration to pull the image. The `X-Dragonfly-Registry` header can instead of the default address + # of registry mirror. + addr: https://yourdomain.com + ## cert is the client cert path with PEM format for the registry. + ## If registry use self-signed cert, the client should set the + ## cert for the registry mirror. + cert: /etc/certs/yourdomain.crt +``` + +##### Setup Dfdaemon as Peer and configure self-signed certificate + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`, +refer to [dfdaemon config](../../../reference/configuration/client/dfdaemon.md). + +```shell +manager: + addr: http://dragonfly-manager:65003 +proxy: + registryMirror: + # addr is the default address of the registry mirror. Proxy will start a registry mirror service for the + # client to pull the image. The client can use the default address of the registry mirror in + # configuration to pull the image. The `X-Dragonfly-Registry` header can instead of the default address + # of registry mirror. + addr: https://yourdomain.com + ## cert is the client cert path with PEM format for the registry. + ## If registry use self-signed cert, the client should set the + ## cert for the registry mirror. + cert: /etc/certs/yourdomain.crt +``` + +##### Configure Podman self-signed certificate + +A custom TLS configuration for a container registry can be configured by creating a directory under `/etc/containers/certs.d`. +The name of the directory must correspond to the host:port of the registry (e.g., yourdomain.com:port), +refer to [containers-certs.d](https://github.com/containers/image/blob/main/docs/containers-certs.d.5.md). + +```shell +cp yourdomain.com.cert /etc/containers/certs.d/yourdomain.com/ +cp yourdomain.com.key /etc/containers/certs.d/yourdomain.com/ +cp ca.crt /etc/containers/certs.d/yourdomain.com/ +``` + +The following example illustrates a configuration that uses custom certificates. + +```shell +/etc/containers/certs.d/ <- Certificate directory +└── yourdomain.com:port <- Hostname:port + ├── yourdomain.com.cert <- Harbor certificate + ├── yourdomain.com.key <- Harbor key + └── ca.crt <- Certificate authority that signed the registry certificate +``` + +Modify your `registries.conf` (default location: `/etc/containers/registries.conf`), refer to [containers-registries.conf](https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md). + +> Notice: `yourdomain.com` is the Harbor service address. + +```toml +[[registry]] +prefix = "yourdomain.com" +location = "yourdomain.com" + +[[registry.mirror]] +location = "127.0.0.1:4001" +``` + +To bypass the TLS verification for a private registry at `yourdomain.com`. + +```toml +[[registry]] +prefix = "yourdomain.com" +location = "yourdomain.com" + +[[registry.mirror]] +insecure = true +location = "127.0.0.1:4001" +``` + +Restart podman: + +```shell +systemctl restart crio +``` + +#### Podman downloads harbor images through Dragonfly + +```shell +podman pull yourdomain.com/alpine:3.19 +``` diff --git a/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/singularity.md b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/singularity.md new file mode 100644 index 00000000..e48d58a5 --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/singularity.md @@ -0,0 +1,6 @@ +--- +id: singularity +title: Singularity/Apptainer +--- + +Documentation for setting Dragonfly's container runtime to Singularity/Apptainer. Dragonfly v2.2.0 drops support for `Singularity/Apptainer`. diff --git a/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/stargz.md b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/stargz.md new file mode 100644 index 00000000..2b0c3a4c --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/integrations/container-runtime/stargz.md @@ -0,0 +1,389 @@ +--- +id: stargz +title: eStargz +slug: /operations/integrations/container-runtime/stargz/ +--- + +This document will help you experience how to use Dragonfly with eStargz. + +## Prerequisites {#prerequisites} + + + +| Name | Version | Document | +| ------------------ | ------- | ----------------------------------------------------------- | +| Kubernetes cluster | 1.20+ | [kubernetes.io](https://kubernetes.io/) | +| Helm | 3.8.0+ | [helm.sh](https://helm.sh/) | +| containerd | v1.4.3+ | [containerd.io](https://containerd.io/) | +| Nerdctl | 0.22+ | [containerd/nerdctl](https://github.com/containerd/nerdctl) | + + + +## Dragonfly Kubernetes Cluster Setup {#dragonfly-kubernetes-cluster-setup} + +For detailed installation documentation based on kubernetes cluster, please refer to [quick-start-kubernetes](../../../getting-started/quick-start/kubernetes.md). + +### Setup kubernetes cluster {#setup-kubernetes-cluster} + +[Kind](https://kind.sigs.k8s.io/) is recommended if no Kubernetes cluster is available for testing. + +Create kind multi-node cluster configuration file `kind-config.yaml`, configuration content is as follows: + +```yaml +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + extraPortMappings: + - containerPort: 30950 + hostPort: 4001 + - containerPort: 30951 + hostPort: 4003 + - role: worker +``` + +Create a kind multi-node cluster using the configuration file: + +```shell +kind create cluster --config kind-config.yaml +``` + +Switch the context of kubectl to kind cluster: + +```shell +kubectl config use-context kind-kind +``` + +### Kind loads Dragonfly image {#kind-loads-dragonfly-image} + +Pull Dragonfly latest images: + +```shell +docker pull dragonflyoss/scheduler:latest +docker pull dragonflyoss/manager:latest +docker pull dragonflyoss/client:latest +``` + +Kind cluster loads Dragonfly latest images: + +```shell +kind load docker-image dragonflyoss/scheduler:latest +kind load docker-image dragonflyoss/manager:latest +kind load docker-image dragonflyoss/client:latest +``` + +### Create Dragonfly cluster based on helm charts {#create-dragonfly-cluster-based-on-helm-charts} + +Create helm charts configuration file `charts-config.yaml` and enable prefetching, configuration content is as follows: + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + proxy: + prefetch: true + +client: + image: + repository: dragonflyoss/client + tag: latest + hostNetwork: true + metrics: + enable: true + config: + verbose: true + proxy: + prefetch: true + server: + port: 4001 + registryMirror: + addr: https://index.docker.io + rules: + - regex: 'blobs/sha256.*' +``` + +Create a Dragonfly cluster using the configuration file: + + + +```shell +$ helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ +$ helm install --wait --create-namespace --namespace dragonfly-system dragonfly dragonfly/dragonfly -f charts-config.yaml +NAME: dragonfly +LAST DEPLOYED: Mon May 28 20:52:12 2024 +NAMESPACE: dragonfly-system +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace dragonfly-system port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +2. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +3. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ +``` + + + +Check that Dragonfly is deployed successfully: + +```shell +$ kubectl get po -n dragonfly-system +NAME READY STATUS RESTARTS AGE +dragonfly-client-5vtn2 1/1 Running 0 74m +dragonfly-client-g648f 1/1 Running 0 74m +dragonfly-manager-58ff696785-kjl8r 1/1 Running 0 74m +dragonfly-mysql-0 1/1 Running 0 74m +dragonfly-redis-master-0 1/1 Running 0 74m +dragonfly-redis-replicas-0 1/1 Running 0 74m +dragonfly-redis-replicas-1 1/1 Running 0 72m +dragonfly-redis-replicas-2 1/1 Running 0 72m +dragonfly-scheduler-0 1/1 Running 0 74m +dragonfly-scheduler-1 1/1 Running 0 66m +dragonfly-scheduler-2 1/1 Running 0 65m +dragonfly-seed-client-0 1/1 Running 4 (66m ago) 74m +dragonfly-seed-client-1 1/1 Running 0 65m +dragonfly-seed-client-2 1/1 Running 0 65m +``` + +Create peer service configuration file `peer-service-config.yaml`, configuration content is as follows: + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: peer + namespace: dragonfly-system +spec: + type: NodePort + ports: + - name: http-4001 + nodePort: 30950 + port: 4001 + - name: http-4003 + nodePort: 30951 + port: 4003 + selector: + app: dragonfly + component: client + release: dragonfly +``` + +Create a peer service using the configuration file: + +```shell +kubectl apply -f peer-service-config.yaml +``` + +## Install Stargz Snapshotter for containerd with Systemd {#install-stargz-Snapshotter-for-containerd-withs-ystemd} + +For detailed stargz installation documentation based on containerd environment, please refer to +[stargz-setup-for-containerd-environment](https://github.com/containerd/stargz-snapshotter/blob/main/docs/INSTALL.md). +The example uses Systemd to manage the `stargz-snapshotter` service. + +### From the Binary Releases {#from-the-binary-releases} + +Download `containerd-stargz-grpc` binary, please refer to [stargz-snapshotter/releases](https://github.com/containerd/stargz-snapshotter/releases/latest): + +> Notice: `stargz-snapshotter_version` is recommended to use the latest version. + +```shell +STARGZ_SNAPSHOTTER_VERSION= +wget -O stargz-snapshotter-linux-arm64.tgz https://github.com/containerd/stargz-snapshotter/releases/download/v$STARGZ_SNAPSHOTTER_VERSION/stargz-snapshotter-v$STARGZ_SNAPSHOTTER_VERSION-linux-arm64.tar.gz +``` + +Untar the package: + +```shell +# Install containerd-stargz-grpc and ctr-remote tools to /usr/local/bin. +tar -C /usr/local/bin -xvf stargz-snapshotter-linux-arm64.tgz containerd-stargz-grpc ctr-remote +``` + +### Install Stargz Snapshotter plugin for containerd {#install-stargz-snapshotter-plugin-for-containerd} + +Modify your `config.toml` (default location: `/etc/containerd/config.toml`), refer to [configure-and-start-containerd](https://github.com/containerd/stargz-snapshotter/blob/main/docs/INSTALL.md#install-stargz-snapshotter-for-containerd-with-systemd). + +```toml +[plugins."io.containerd.grpc.v1.cri".containerd] + snapshotter = "stargz" + disable_snapshot_annotations = false + +[proxy_plugins] + [proxy_plugins.stargz] + type = "snapshot" + address = "/run/containerd-stargz-grpc/containerd-stargz-grpc.sock" +``` + +Restart containerd: + +```shell +sudo systemctl restart containerd +``` + +Check that containerd uses the `stargz-snapshotter` plugin: + +```shell +$ ctr -a /run/containerd/containerd.sock plugin ls | grep stargz +io.containerd.snapshotter.v1 stargz - ok +``` + +### Systemd starts Stargz Snapshotter {#systemd-stargz-snapshotter} + +Create the Stargz configuration file `config.toml`. +Please refer to the +[Stargz Mirror](https://github.com/containerd/stargz-snapshotter/blob/main/docs/overview.md#registry-mirrors-and-insecure-connection) +documentation for details. + +Set the `host` address in the configuration file to your actual address. Configuration content is as follows: + +```toml +[[resolver.host."docker.io".mirrors]] + host = "http://127.0.0.1:4001" + insecure = true + [resolver.host."docker.io".mirrors.header] + X-Dragonfly-Registry = ["https://index.docker.io"] +``` + +Copy configuration file to `/etc/containerd-stargz-grpc/config.toml`: + +```shell +sudo mkdir /etc/containerd-stargz-grpc && cp config.toml /etc/containerd-stargz-grpc/config.toml +``` + +Download systemd configuration file `stargz-snapshotter.service` of stargz snapshotter, configuration content is as follows: + +```shell +wget -O /etc/systemd/system/stargz-snapshotter.service https://raw.githubusercontent.com/containerd/stargz-snapshotter/main/script/config/etc/systemd/system/stargz-snapshotter.service +systemctl enable --now stargz-snapshotter +systemctl restart containerd +``` + +### Convert an image to Stargz image {#convert-an-image-to-stargz-image} + +Convert `alpine:3.19` image to Stargz image. + +Login to Dockerhub: + +```shell +docker login +``` + +Convert `alpine:3.19` image to Stargz image, and `DOCKERHUB_REPO_NAME` environment variable +needs to be set to the user's image repository: + +```shell +DOCKERHUB_REPO_NAME= +sudo nerdctl pull alpine:3.19 +sudo nerdctl image convert --estargz --oci alpine:3.19 $DOCKERHUB_REPO_NAME/alpine:3.19-esgz +sudo nerdctl image push $DOCKERHUB_REPO_NAME/alpine:3.19-esgz +``` + +### Stargz downloads images through Dragonfly {#stargz-downloads-images-through-dragonfly} + +Running `alpine:3.19-esgz` with nerdctl: + +```shell +sudo nerdctl --snapshotter stargz run --rm -it $DOCKERHUB_REPO_NAME/alpine:3.19-esgz +``` + +#### Verify {#verify} + +Check that Stargz is downloaded via Dragonfly based on mirror mode: + + + +```shell +$ journalctl -u stargz-snapshotter | grep 'prepared remote snapshot' +containerd-stargz-grpc[4049]: {"key":"default/73/extract-656625708-vmlX sha256:7c7f00c83139c0b82eae3452058c975fb5a086d1c7d9124c77dd7a66d499dc6a","level":"debug","msg":"prepared remote snapshot","parent":"default/72/sha256:413f24977d4a9ef3a4582e041dbf50a3d32f5f60d97c98225eb492883d9c4c75","remote-snapshot-prepared":"true","time":"2024-05-30T14:36:55.660116292Z"} +``` + +You can execute the following command to check if the `alpine:3.19` image is distributed via Dragonfly. + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="kind-worker")].metadata.name}' | head -n 1 ) + +# Find task id. +export TASK_ID=$(kubectl -n dragonfly-system exec ${POD_NAME} -- sh -c "grep -hoP 'alpine.*task_id=\"\K[^\"]+' /var/log/dragonfly/dfdaemon/* | head -n 1") + +# Check logs. +kubectl -n dragonfly-system exec -it ${POD_NAME} -- sh -c "grep ${TASK_ID} /var/log/dragonfly/dfdaemon/* | grep 'download task succeeded'" +``` + +The expected output is as follows: + +```shell +{ + 2024-04-19T02:44:09.259458Z "INFO" + "download_task":"dragonfly-client/src/grpc/dfdaemon_download.rs:276":: "download task succeeded" + "host_id": "172.18.0.3-kind-worker", + "task_id": "a46de92fcb9430049cf9e61e267e1c3c9db1f1aa4a8680a048949b06adb625a5", + "peer_id": "172.18.0.3-kind-worker-86e48d67-1653-4571-bf01-7e0c9a0a119d" +} +``` + + + +## Performance testing {#performance-testing} + +Test the performance of single-machine image download after the integration of +`Stargz Mirror` mode and `Dragonfly P2P`. +Test running version commands using images in different languages. +For example, the startup command used to run a `python` image is `python -V`. +The tests were performed on the same machine. +Due to the influence of the network environment of the machine itself, +the actual download time is not important, but the ratio of the increase in +the download time in different scenarios is very important. + +![stargz-mirror-dragonfly](../../../resource/operations/integrations/stargz-mirror-dragonfly.png) + +- OCIv1: Use containerd to pull image directly. +- Stargz Cold Boot: Use containerd to pull image via stargz-snapshotter and doesn't hit any cache. +- Stargz & Dragonfly Cold Boot: Use containerd to pull image via stargz-snapshotter. + Transfer the traffic to Dragonfly P2P based on Stargz mirror mode and no cache hits. + +Test results show `Stargz Mirror` mode and `Dragonfly P2P` integration. +Use the `Stargz` download image to compare the `OCIv1` mode, +It can effectively reduce the image download time. +The cold boot of `Stargz` and `Stargz & Dragonfly` are basically close. +The most important thing is that if a very large `kubernetes` cluster uses `Stargz` to pull images. +The download of each image layer will be generate as many range requests as needed. +The `QPS` of the source of the registry is too high. +Causes the `QPS` of the registry to be relatively high. +Dragonfly can effectively reduce the number of requests and +download traffic for back-to-source registry. +In the best case, `Dragonfly` can make the same task back-to-source download only once. diff --git a/versioned_docs/version-v2.2.0/operations/integrations/git-lfs.md b/versioned_docs/version-v2.2.0/operations/integrations/git-lfs.md new file mode 100644 index 00000000..26576634 --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/integrations/git-lfs.md @@ -0,0 +1,365 @@ +--- +id: git-lfs +title: Git LFS +slug: /operations/integrations/git-lfs/ +--- + +## What is Git LFS? + +[Git LFS (Large File Storage)](https://git-lfs.com/) is an open-source extension for Git that enables users to +handle large files more efficiently in Git repositories. Git is a version control system designed +primarily for text files such as source code and it can become less efficient when dealing with large binary files like +audio, videos, datasets, graphics and other large assets. These files can significantly increase +the size of a repository and make cloning and fetching operations slow. + +![git-lfs](../../resource/operations/integrations/git-lfs.png) + +Git LFS addresses this issue by storing these large files on a separate server and replacing them in +the Git repository with small placeholder files (pointers). When a user clones or pulls from the repository, +Git LFS fetches the large files from the LFS server as needed rather than downloading all the +large files with the initial clone of the repository. For specifications, +please refer to the [Git LFS Specification](https://github.com/git-lfs/git-lfs/blob/main/docs/spec.md). +The server is implemented based on the HTTP protocol, refer to [Git LFS API](https://github.com/git-lfs/git-lfs/tree/main/docs/api). +Usually Git LFS's content storage uses object storage to store large files. + +### Git LFS Usage + +#### Git LFS manages large files + +Github and GitLab usually manage large files based on Git LFS. + +- GitHub uses Git LFS refer to [About Git Large File Storage](https://docs.github.com/en/repositories/working-with-files/managing-large-files/about-git-large-file-storage). +- GitLab uses Git LFS refer to [Git Large File Storage](https://docs.gitlab.com/ee/topics/git/lfs/). + +#### Git LFS manages AI models and AI datasets + +Large files of models and datasets in AI are usually managed based on Git LFS. +[Hugging Face Hub](https://huggingface.co/) and [ModelScope Hub](https://modelscope.cn/) +manage models and datasets based on Git LFS. + +- Hugging Face Hub uses Git LFS refer to [Getting Started with Repositories](https://huggingface.co/docs/hub/repositories-getting-started). +- ModelScope Hub uses Git LFS refer to [Getting Started with ModelScope](https://modelscope.cn/docs/ModelScope%20Hub%E4%BD%BF%E7%94%A8%E6%96%87%E6%A1%A3). + +Hugging Face Hub's Python Library implements Git LFS to download models and datasets. +Hugging Face Hub's Python Library distributes models and datasets to accelerate, +refer to [Hugging Face accelerates distribution of models and datasets based on Dragonfly](https://www.cncf.io/blog/2023/11/16/hugging-face-accelerates-distribution-of-models-and-datasets-based-on-dragonfly/). + +## Dragonfly eliminates the bandwidth limit of Git LFS's content storage + +This document will help you experience how to use dragonfly with Git LFS. During the downloading of large files, +the file size is large and there are many services downloading the larges files at the same time. +The bandwidth of the storage will reach the limit and the download will be slow. + +![git-lfs-download](../../resource/operations/integrations/git-lfs-download.png) + +Dragonfly can be used to eliminate the bandwidth limit of the storage through P2P technology, +thereby accelerating large files downloading. + +![git-lfs-p2p](../../resource/operations/integrations/git-lfs-p2p.png) + +## Dragonfly accelerates downloads with Git LFS + +By proxying the HTTP protocol file download request of Git LFS to Dragonfly Peer Proxy, +the file download traffic is forwarded to the P2P network. The following documentation is based on GitHub LFS. + +### Get the Content Storage address of Git LFS + +Add `GIT_CURL_VERBOSE=1` to print verbose logs of git clone and get the address of content storage of Git LFS. + +```shell +GIT_CURL_VERBOSE=1 git clone git@github.com:{YOUR-USERNAME}/{YOUR-REPOSITORY}.git +``` + +Look for the `trace git-lfs` keyword in the logs and you can see the log of Git LFS download files. +Pay attention to the content of `actions` and `download` in the log. + + + +```text +18:52:51.137490 trace git-lfs: HTTP: {"objects":[{"oid":"68ac0af011ce9c51a4c74c5ac9a40218e9e67bf55ebe13c8f2d758f710a3163a","size":19670194,"actions":{"download":{"href":"https://github-cloud.githubusercontent.com/alambic/media/730487717/68/ac/68ac0af011ce9c51a4c74c5ac9a40218e9e67bf55ebe13c8f2d758f710a3163a?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIA5BA2674WPWWEFGQ5%2F20240605%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240605T105251Z&X-Amz-Expires=3600&X-Amz-Signature=35cf8e02f0d3e2da893aa46fa4929d79ce1abb18aea8e0fabfbb138706d7151818:52:51.137574 trace git-lfs: HTTP: &X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=810214636&token=1","expires_at":"2024-06-05T11:52:51Z","expires_in":3600}}}]} +``` + + + +The download URL can be found in `actions.download.href` in the `objects`. +You can find that the content storage of GitHub LFS is actually stored at `github-cloud.githubusercontent.com`. + +**Information about Git LFS :** + +The content storage address of Git LFS is `github-cloud.githubusercontent.com`. + +### Installation + +#### Prerequisites + +| Name | Version | Document | +| ------------------ | ------- | --------------------------------------- | +| Kubernetes cluster | 1.20+ | [kubernetes.io](https://kubernetes.io/) | +| Helm | 3.8.0+ | [helm.sh](https://helm.sh/) | +| Git LFS | 3.3.0+ | [git-lfs](https://git-lfs.com/) | + +#### Dragonfly Kubernetes Cluster Setup {#dragonfly-kubernetes-cluster-setup} + +For detailed installation documentation based on kubernetes cluster, please refer to [quick-start-kubernetes](../../getting-started/quick-start/kubernetes.md). + +##### Setup kubernetes cluster + +[Kind](https://kind.sigs.k8s.io/) is recommended if no kubernetes cluster is available for testing. + +Create kind multi-node cluster configuration file kind-config.yaml, configuration content is as follows: + +```yaml +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + extraPortMappings: + - containerPort: 30950 + hostPort: 4001 + - role: worker +``` + +Create a kind multi-node cluster using the configuration file: + +```shell +kind create cluster --config kind-config.yaml +``` + +Switch the context of kubectl to kind cluster: + +```shell +kubectl config use-context kind-kind +``` + +##### Kind loads Dragonfly image + +Pull Dragonfly latest images: + +```shell +docker pull dragonflyoss/scheduler:latest +docker pull dragonflyoss/manager:latest +docker pull dragonflyoss/client:latest +``` + +Kind cluster loads Dragonfly latest images: + +```shell +kind load docker-image dragonflyoss/scheduler:latest +kind load docker-image dragonflyoss/manager:latest +kind load docker-image dragonflyoss/client:latest +``` + +##### Create Dragonfly cluster based on helm charts + +Create helm charts configuration file charts-config.yaml. +Add the `github-cloud.githubusercontent.com` rule to `client.config.proxy.rules.regex` +to forward the HTTP file download of content storage of Git LFS to the P2P network. + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + +client: + image: + repository: dragonflyoss/client + tag: latest + hostNetwork: true + metrics: + enable: true + config: + verbose: true + proxy: + server: + port: 4001 + registryMirror: + addr: https://index.docker.io + rules: + - regex: 'blobs/sha256.*' + - regex: 'github-cloud.githubusercontent.com.*' +``` + +Create a Dragonfly cluster using the configuration file: + + + +```shell +$ helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ +$ helm install --wait --create-namespace --namespace dragonfly-system dragonfly dragonfly/dragonfly -f charts-config.yaml +NAME: dragonfly +LAST DEPLOYED: Mon June 5 12:53:14 2024 +NAMESPACE: dragonfly-system +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace dragonfly-system port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +2. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +3. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ + + +4. Get Jaeger query URL by running these commands: + export JAEGER_QUERY_PORT=$(kubectl --namespace dragonfly-system get services dragonfly-jaeger-query -o jsonpath="{.spec.ports[0].port}") + kubectl --namespace dragonfly-system port-forward service/dragonfly-jaeger-query 16686:$JAEGER_QUERY_PORT + echo "Visit http://127.0.0.1:16686/search?limit=20&lookback=1h&maxDuration&minDuration&service=dragonfly to query download events" +``` + + + +Check that Dragonfly is deployed successfully: + +```shell +$ kubectl get po -n dragonfly-system +NAME READY STATUS RESTARTS AGE +dragonfly-client-6jgzn 1/1 Running 0 34m +dragonfly-client-qzcz9 1/1 Running 0 34m +dragonfly-manager-6bc4454d94-ldsk7 1/1 Running 0 34m +dragonfly-mysql-0 1/1 Running 0 34m +dragonfly-redis-master-0 1/1 Running 0 34m +dragonfly-redis-replicas-0 1/1 Running 0 34m +dragonfly-redis-replicas-1 1/1 Running 0 34m +dragonfly-redis-replicas-2 1/1 Running 0 34m +dragonfly-scheduler-0 1/1 Running 0 34m +dragonfly-scheduler-1 1/1 Running 0 34m +dragonfly-scheduler-2 1/1 Running 0 34m +dragonfly-seed-client-0 1/1 Running 0 34m +dragonfly-seed-client-1 1/1 Running 0 34m +dragonfly-seed-client-2 1/1 Running 0 34m +``` + +Create peer service configuration file peer-service-config.yaml, configuration content is as follows: + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: peer + namespace: dragonfly-system +spec: + type: NodePort + ports: + - name: http-4001 + nodePort: 30950 + port: 4001 + selector: + app: dragonfly + component: client + release: dragonfly +``` + +Create a peer service using the configuration file: + +```shell +kubectl apply -f peer-service-config.yaml +``` + +### Git LFS downlads large files via Dragonfly + +Proxy Git LFS download requests to Dragonfly Peer Proxy +through Git configuration. Set Git configuration includes +`http.proxy`, `lfs.transfer.enablehrefrewrite` and `url.http://github-cloud.githubusercontent.com/.insteadOf` properties. + +> Notice: Replace the `http.proxy` address with your actual address. + +```shell +git config --global http.proxy http://127.0.0.1:4001 +git config --global lfs.transfer.enablehrefrewrite true +git config --global url.http://github-cloud.githubusercontent.com/.insteadOf https://github-cloud.githubusercontent.com/ +``` + +Forward Git LFS download requests to the P2P network via Dragonfly Peer Proxy and Git clone the large files. + +```shell +git clone git@github.com:{YOUR-USERNAME}/{YOUR-REPOSITORY}.git +``` + +Skip GIT SSL verification. + +```shell +GIT_SSL_NO_VERIFY=1 git clone git@github.com:{YOUR-USERNAME}/{YOUR-REPOSITORY}.git +``` + +### Verify + +Execute the command: + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly, +component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="kind-worker")].metadata.name}' | head -n 1 ) + +# Check logs. +kubectl -n dragonfly-system exec -it ${POD_NAME} -- grep "download task succeeded" /var/log/dragonfly/dfdaemon/* +``` + +The expected output is as follows: + + + +```shell +{ + 2024-04-19T02:44:09.259458Z "INFO" + "download_task":"dragonfly-client/src/grpc/dfdaemon_download.rs:276":: "download task succeeded" + "host_id": "172.18.0.3-kind-worker", + "task_id": "a46de92fcb9430049cf9e61e267e1c3c9db1f1aa4a8680a048949b06adb625a5", + "peer_id": "172.18.0.3-kind-worker-86e48d67-1653-4571-bf01-7e0c9a0a119d" +} +``` + + + +## Performance testing + +Test the performance of single-machine large files download after the integration of Git LFS and Dragonfly P2P. +Due to the influence of the network environment of the machine itself, the actual download time is not important, +but the ratio of the increase in the download time in different scenarios is very important. + +![git-lfs-dragonfly](../../resource/operations/integrations/git-lfs-dragonfly.png) + +- Git LFS: Use Git LFS to download large files directly. +- Git LFS & Dragonfly Cold Boot: Use Git LFS to download large files via Dragonfly P2P network and no cache hits. +- Hit Dragonfly Remote Peer Cache: Use Git LFS to download large files via + Dragonfly P2P network and hit the remote peer cache. +- Hit Dragonfly Remote Local Cache: Use Git LFS to download large files via + Dragonfly P2P network and hit the local peer cache. + +Test results show Git LFS and Dragonfly P2P integration. It can effectively reduce the file download time. +Note that this test was a single-machine test, which means that in the case of cache hits, +the performance limitation is on the disk. If Dragonfly is deployed on multiple machines for P2P download, +the large files download speed will be faster. diff --git a/versioned_docs/version-v2.2.0/operations/integrations/harbor.md b/versioned_docs/version-v2.2.0/operations/integrations/harbor.md new file mode 100644 index 00000000..be8e84d1 --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/integrations/harbor.md @@ -0,0 +1,12 @@ +--- +id: harbor +title: Harbor +slug: /operations/integrations/harbor/ +--- + +This document will help you experience how to use dragonfly with harbor. + +## P2P Preheat {#p2p-preheat} + +Dragonfly 2.0 is compatible with dragonfly 1.0 integrated harbor preheat interface. +Harbor preheat feature integrates dragonfly, please refer to [harbor-preheat](../../advanced-guides/preheat.md#harbor). diff --git a/versioned_docs/version-v2.2.0/operations/integrations/hugging-face.md b/versioned_docs/version-v2.2.0/operations/integrations/hugging-face.md new file mode 100644 index 00000000..8bcd17d8 --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/integrations/hugging-face.md @@ -0,0 +1,431 @@ +--- +id: hugging-face +title: Hugging Face +slug: /operations/integrations/hugging-face/ +--- + +This document will help you experience how to use dragonfly with hugging face. +During the downloading of datasets or models, the file size is large and +there are many services downloading the files at the same time. +The bandwidth of the storage will reach the limit and the download will be slow. + +![hugging-face-download](../../resource/operations/integrations/hugging-face-download.png) + +Dragonfly can be used to eliminate the bandwidth limit of the storage through P2P technology, thereby accelerating file downloading. + +![hugging-face-p2p](../../resource/operations/integrations/hugging-face-p2p.png) + +## Prerequisites {#prerequisites} + + + +| Name | Version | Document | +| ------------------ | ------- | --------------------------------------- | +| Kubernetes cluster | 1.20+ | [kubernetes.io](https://kubernetes.io/) | +| Helm | 3.8.0+ | [helm.sh](https://helm.sh/) | +| Python | 3.8.0+ | [python.org](https://www.python.org/) | + + + +## Dragonfly Kubernetes Cluster Setup {#dragonfly-kubernetes-cluster-setup} + +For detailed installation documentation based on kubernetes cluster, please refer to [quick-start-kubernetes](../../getting-started/quick-start/kubernetes.md). + +### Setup kubernetes cluster {#setup-kubernetes-cluster} + +[Kind](https://kind.sigs.k8s.io/) is recommended if no Kubernetes cluster is available for testing. + +Create kind multi-node cluster configuration file `kind-config.yaml`, configuration content is as follows: + +```yaml +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + extraPortMappings: + - containerPort: 30950 + hostPort: 4001 + - role: worker +``` + +Create a kind multi-node cluster using the configuration file: + +```shell +kind create cluster --config kind-config.yaml +``` + +Switch the context of kubectl to kind cluster: + +```shell +kubectl config use-context kind-kind +``` + +### Kind loads Dragonfly image {#kind-loads-dragonfly-image} + +Pull Dragonfly latest images: + +```shell +docker pull dragonflyoss/scheduler:latest +docker pull dragonflyoss/manager:latest +docker pull dragonflyoss/client:latest +``` + +Kind cluster loads Dragonfly latest images: + +```shell +kind load docker-image dragonflyoss/scheduler:latest +kind load docker-image dragonflyoss/manager:latest +kind load docker-image dragonflyoss/client:latest +``` + +### Create Dragonfly cluster based on helm charts {#create-dragonfly-cluster-based-on-helm-charts} + +Create helm charts configuration file `charts-config.yaml` and set `client.config.proxy.registryMirror.addr` to +the address of the Hugging Face Hub's LFS server, configuration content is as follows: + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + +client: + image: + repository: dragonflyoss/client + tag: latest + hostNetwork: true + metrics: + enable: true + config: + verbose: true + proxy: + server: + port: 4001 + registryMirror: + addr: https://cdn-lfs.huggingface.co + rules: + - regex: "repos.*" + useTLS: true +``` + +Create a Dragonfly cluster using the configuration file: + + + +```shell +$ helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ +$ helm install --wait --create-namespace --namespace dragonfly-system dragonfly dragonfly/dragonfly -f charts-config.yaml +NAME: dragonfly +LAST DEPLOYED: Mon Jun 3 16:32:28 2024 +NAMESPACE: dragonfly-system +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace dragonfly-system port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +2. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +3. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ +``` + + + +Check that Dragonfly is deployed successfully: + +```shell +$ kubectl get po -n dragonfly-system +NAME READY STATUS RESTARTS AGE +dragonfly-client-6jgzn 1/1 Running 0 21m +dragonfly-client-qzcz9 1/1 Running 0 21m +dragonfly-manager-6bc4454d94-ldsk7 1/1 Running 0 21m +dragonfly-mysql-0 1/1 Running 0 21m +dragonfly-redis-master-0 1/1 Running 0 21m +dragonfly-redis-replicas-0 1/1 Running 0 21m +dragonfly-redis-replicas-1 1/1 Running 0 21m +dragonfly-redis-replicas-2 1/1 Running 0 21m +dragonfly-scheduler-0 1/1 Running 0 21m +dragonfly-scheduler-1 1/1 Running 0 21m +dragonfly-scheduler-2 1/1 Running 0 21m +dragonfly-seed-client-0 1/1 Running 2 (21m ago) 21m +dragonfly-seed-client-1 1/1 Running 0 21m +dragonfly-seed-client-2 1/1 Running 0 21m +``` + +Create peer service configuration file `peer-service-config.yaml`, configuration content is as follows: + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: peer + namespace: dragonfly-system +spec: + type: NodePort + ports: + - name: http-4001 + nodePort: 30950 + port: 4001 + selector: + app: dragonfly + component: client + release: dragonfly +``` + +Create a peer service using the configuration file: + +```shell +kubectl apply -f peer-service-config.yaml +``` + +## Use Hub Python Library to download files and distribute traffic through Draognfly {#use-hub-python-library-to-download-files-and-distribute-traffic-through-draognfly} + +Any API in the [Hub Python Library](https://huggingface.co/docs/huggingface_hub/index) +that uses `Requests` library for downloading files can +distribute the download traffic in the P2P network by +setting `DragonflyAdapter` to the requests `Session`. + +### Download a single file with Dragonfly {#download-a-single-file-with-dragonfly} + +A single file can be downloaded using the [`hf_hub_download`](https://huggingface.co/docs/huggingface_hub/v0.17.1/en/package_reference/file_download#huggingface_hub.hf_hub_download), +distribute traffic through the Dragonfly peer. + +Create `hf_hub_download_dragonfly.py` file. Use `DragonflyAdapter` to forward the file download request of +the LFS protocol to Dragonfly HTTP proxy, so that it can use the P2P network +to distribute file, configuration content is as follows: + +> Notice: Replace the `session.proxies` address with your actual address. + +```python +import requests +from requests.adapters import HTTPAdapter +from urllib.parse import urlparse +from huggingface_hub import hf_hub_download +from huggingface_hub import configure_http_backend + +class DragonflyAdapter(HTTPAdapter): + def get_connection(self, url, proxies=None): + # Change the schema of the LFS request to download large files from https:// to http://, + # so that Dragonfly HTTP proxy can be used. + if url.startswith('https://cdn-lfs.huggingface.co'): + url = url.replace('https://', 'http://') + return super().get_connection(url, proxies) + + def add_headers(self, request, **kwargs): + super().add_headers(request, **kwargs) + + # If there are multiple different LFS repositories, you can override the + # default repository address by adding X-Dragonfly-Registry header. + if request.url.find('example.com') != -1: + request.headers["X-Dragonfly-Registry"] = 'https://example.com' + +# Create a factory function that returns a new Session. +def backend_factory() -> requests.Session: + session = requests.Session() + session.mount('http://', DragonflyAdapter()) + session.mount('https://', DragonflyAdapter()) + session.proxies = {'http': 'http://127.0.0.1:4001'} + return session + +# Set it as the default session factory +configure_http_backend(backend_factory=backend_factory) + +hf_hub_download(repo_id="tiiuae/falcon-rw-1b", filename="pytorch_model.bin") +``` + +Download a single file of th LFS protocol with Dragonfly: + + + +```shell +$ python3 hf_hub_download_dragonfly.py +(…)YkNX13a46FCg__&Key-Pair-Id=KVTP0A1DKRTAX: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.62G/2.62G [00:52<00:00, 49.8MB/s] +``` + + + +#### Verify a single file download with Dragonfly {#verify-a-single-file-download-with-dragonfly} + +Execute the command: + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly, +component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="kind-worker")].metadata.name}' | head -n 1 ) + +# Check logs. +kubectl -n dragonfly-system exec -it ${POD_NAME} -- grep "download task succeeded" /var/log/dragonfly/dfdaemon/* +``` + +The expected output is as follows: + + + +```shell + 2024-04-19T02:44:09.259458Z INFO + "download_task":"dragonfly-client/src/grpc/dfdaemon_download.rs:276":: "download task succeeded" + "host_id": "172.18.0.3-kind-worker", + "task_id": "a46de92fcb9430049cf9e61e267e1c3c9db1f1aa4a8680a048949b06adb625a5", + "peer_id": "172.18.0.3-kind-worker-86e48d67-1653-4571-bf01-7e0c9a0a119d" +``` + + + +### Download a snapshot of the repo with Dragonfly {#download-a-snapshot-of-the-repo-with-dragonfly} + +A snapshot of the repo can be downloaded using the [`snapshot_download`](https://huggingface.co/docs/huggingface_hub/v0.17.1/en/package_reference/file_download#huggingface_hub.snapshot_download), +distribute traffic through the Dragonfly peer. + +Create `snapshot_download_dragonfly.py` file. Use `DragonflyAdapter` to forward the file download request of +the LFS protocol to Dragonfly HTTP proxy, so that it can use the P2P network +to distribute file. Only the files of the LFS protocol will be distributed +through the Dragonfly P2P network. content is as follows: + +> Notice: Replace the `session.proxies` address with your actual address. + +```python +import requests +from requests.adapters import HTTPAdapter +from urllib.parse import urlparse +from huggingface_hub import snapshot_download +from huggingface_hub import configure_http_backend + +class DragonflyAdapter(HTTPAdapter): + def get_connection(self, url, proxies=None): + # Change the schema of the LFS request to download large files from https:// to http://, + # so that Dragonfly HTTP proxy can be used. + if url.startswith('https://cdn-lfs.huggingface.co'): + url = url.replace('https://', 'http://') + return super().get_connection(url, proxies) + + def add_headers(self, request, **kwargs): + super().add_headers(request, **kwargs) + + # If there are multiple different LFS repositories, you can override the + # default repository address by adding X-Dragonfly-Registry header. + if request.url.find('example.com') != -1: + request.headers["X-Dragonfly-Registry"] = 'https://example.com' + +# Create a factory function that returns a new Session. +def backend_factory() -> requests.Session: + session = requests.Session() + session.mount('http://', DragonflyAdapter()) + session.mount('https://', DragonflyAdapter()) + session.proxies = {'http': 'http://127.0.0.1:4001'} + return session + +# Set it as the default session factory +configure_http_backend(backend_factory=backend_factory) + +snapshot_download(repo_id="tiiuae/falcon-rw-1b") +``` + +Download a snapshot of the repo with Dragonfly: + + + +```shell +$ python3 snapshot_download_dragonfly.py +(…)03165eb22f0a867d4e6a64d34fce19/README.md: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7.60k/7.60k [00:00<00:00, 374kB/s] +(…)7d4e6a64d34fce19/configuration_falcon.py: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6.70k/6.70k [00:00<00:00, 762kB/s] +(…)f0a867d4e6a64d34fce19/modeling_falcon.py: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 56.9k/56.9k [00:00<00:00, 5.35MB/s] +(…)3165eb22f0a867d4e6a64d34fce19/merges.txt: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 456k/456k [00:00<00:00, 9.07MB/s] +(…)867d4e6a64d34fce19/tokenizer_config.json: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 234/234 [00:00<00:00, 106kB/s] +(…)eb22f0a867d4e6a64d34fce19/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.11M/2.11M [00:00<00:00, 27.7MB/s] +(…)3165eb22f0a867d4e6a64d34fce19/vocab.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 798k/798k [00:00<00:00, 19.7MB/s] +(…)7d4e6a64d34fce19/special_tokens_map.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 99.0/99.0 [00:00<00:00, 45.3kB/s] +(…)67d4e6a64d34fce19/generation_config.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:00<00:00, 5.02kB/s] +(…)165eb22f0a867d4e6a64d34fce19/config.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.05k/1.05k [00:00<00:00, 75.9kB/s] +(…)eb22f0a867d4e6a64d34fce19/.gitattributes: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.48k/1.48k [00:00<00:00, 171kB/s] +(…)t-oSSW23tawg__&Key-Pair-Id=KVTP0A1DKRTAX: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.62G/2.62G [00:50<00:00, 52.1MB/s] +Fetching 12 files: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:50<00:00, 4.23s/it] +``` + + + +#### Verify a snapshot of the repo download with Dragonfly {#verify-a-snapshot-of-the-repo-download-with-dragonfly} + +Execute the command: + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly, +component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="kind-worker")].metadata.name}' | head -n 1 ) + +# Check logs. +kubectl -n dragonfly-system exec -it ${POD_NAME} -- grep "download task succeeded" /var/log/dragonfly/dfdaemon/* +``` + +The expected output is as follows: + + + +```shell +{ + 2024-04-19T02:44:09.259458Z "INFO" + "download_task":"dragonfly-client/src/grpc/dfdaemon_download.rs:276":: "download task succeeded" + "host_id": "172.18.0.3-kind-worker", + "task_id": "a46de92fcb9430049cf9e61e267e1c3c9db1f1aa4a8680a048949b06adb625a5", + "peer_id": "172.18.0.3-kind-worker-86e48d67-1653-4571-bf01-7e0c9a0a119d" +} +``` + + + +## Performance testing {#performance-testing} + +Test the performance of single-machine file download by `hf_hub_download` API after the integration of +Hugging Face Python Library and Dragonfly P2P. +Due to the influence of the network environment of the machine itself, the actual download time is not important, +but the ratio of the increase in the download time in different scenarios is very important. + +![hugging-face-dragonfly](../../resource/operations/integrations/hugging-face-dragonfly.png) + + + +- Hugging Face Python Library: Use `hf_hub_download` API to download models directly. +- Hugging Face Python Library & Dragonfly Cold Boot: Use `hf_hub_download` API to download models via Dragonfly P2P network and no cache hits. +- Hit Dragonfly Remote Peer Cache: Use `hf_hub_download` API to download models via Dragonfly P2P network and hit the remote peer cache. +- Hit Dragonfly Local Peer Cache: Use `hf_hub_download` API to download models via Dragonfly P2P network and hit the local peer cache. +- Hit Hugging Face Cache: Use `hf_hub_download` API to download models via Dragonfly P2P network and hit the Hugging Face local cache. + + + +Test results show Hugging Face Python Library and Dragonfly P2P integration. +It can effectively reduce the file download time. +Note that this test was a single-machine test, which means that in the case of cache hits, +the performance limitation is on the disk. +If Dragonfly is deployed on multiple machines for P2P download, the models download speed will be faster. diff --git a/versioned_docs/version-v2.2.0/operations/integrations/torchserve.md b/versioned_docs/version-v2.2.0/operations/integrations/torchserve.md new file mode 100644 index 00000000..78541f0c --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/integrations/torchserve.md @@ -0,0 +1,730 @@ +--- +id: torchserve +title: TorchServe +slug: /operations/integrations/torchserve/ +--- + +This document will help you experience how to use Dragonfly with [TorchServe](https://github.com/pytorch/serve). +During the downloading of models, the file size is large and there are many services downloading the files at the same time. +The bandwidth of the storage will reach the limit and the download will be slow. + +![torchserve-download](../../resource/operations/integrations/torchserve-download.png) + +Dragonfly can be used to eliminate the bandwidth limit of the storage through P2P technology, thereby accelerating file downloading. + +![torchserve-p2p](../../resource/operations/integrations/torchserve-p2p.png) + +## Architecture + +![torchserve-dragonfly-integration](../../resource/operations/integrations/torchserve-dragonfly-integration.png) + +[Dragonfly Endpoint](https://github.com/dragonflyoss/dragonfly-endpoint) plugin forwards TorchServe download +model requests to the Dragonfly P2P network. + +![torchserve-dragonfly-architecture](../../resource/operations/integrations/torchserve-dragonfly-architecture.png) + +The models download steps: + +1. TorchServe sends a model download request and the request is forwarded to the Dragonfly Peer. +2. The Dragonfly Peer registers tasks with the Dragonfly Scheduler. +3. Return the candidate parents to Dragonfly Peer. +4. Dragonfly Peer downloads model from candidate parents. +5. After downloading the model, TorchServe will register the model. + +## Installation + +By integrating Dragonfly Endpoint into TorchServe, download traffic through Dragonfly to pull models stored in +S3, OSS, GCS, and ABS, and register models in TorchServe. +The Dragonfly Endpoint plugin is in the [dragonfly-endpoint](https://github.com/dragonflyoss/dragonfly-endpoint) repository. + +### Prerequisites + +| Name | Version | document | +| ------------------ | ------- | ------------------------------------------------ | +| Kubernetes cluster | 1.20+ | [kubernetes.io](https://kubernetes.io/) | +| Helm | 3.8.0+ | [helm.sh](https://helm.sh/) | +| TorchServe | 0.4.0+ | [pytorch.org/serve/](https://pytorch.org/serve/) | + +### Dragonfly Kubernetes Cluster Setup + +For detailed installation documentation, please refer to [quick-start-kubernetes](../../getting-started/quick-start/kubernetes.md). + +#### Prepare Kubernetes Cluster + +[Kind](https://kind.sigs.k8s.io/) is recommended if no Kubernetes cluster is available for testing. + +Create kind multi-node cluster configuration file `kind-config.yaml`, configuration content is as follows: + +```yaml +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + - role: worker +``` + +Create a kind multi-node cluster using the configuration file: + +```shell +kind create cluster --config kind-config.yaml +``` + +Switch the context of kubectl to kind cluster: + +```shell +kubectl config use-context kind-kind +``` + +#### Kind loads Dragonfly image + +Pull Dragonfly latest images: + +```shell +docker pull dragonflyoss/scheduler:latest +docker pull dragonflyoss/manager:latest +docker pull dragonflyoss/client:latest +``` + +Kind cluster loads Dragonfly latest images: + +```shell +kind load docker-image dragonflyoss/scheduler:latest +kind load docker-image dragonflyoss/manager:latest +kind load docker-image dragonflyoss/client:latest +``` + +#### Create Dragonfly cluster based on helm charts + +Create helm charts configuration file `charts-config.yaml` and set `client.config.proxy.rules.regex` to +match the download path of the object storage, configuration content is as follows: + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + +client: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + proxy: + server: + port: 4001 + registryMirror: + addr: https://index.docker.io + rules: + - regex: 'blobs/sha256.*' + - regex: '.*amazonaws.*' +``` + +Create a Dragonfly cluster using the configuration file: + + + +```shell +$ helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ +$ helm install --wait --create-namespace --namespace dragonfly-system dragonfly dragonfly/dragonfly -f charts-config.yaml +LAST DEPLOYED: Mon June 5 16:53:14 2024 +NAMESPACE: dragonfly-system +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace dragonfly-system port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +2. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +3. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ + + +4. Get Jaeger query URL by running these commands: + export JAEGER_QUERY_PORT=$(kubectl --namespace dragonfly-system get services dragonfly-jaeger-query -o jsonpath="{.spec.ports[0].port}") + kubectl --namespace dragonfly-system port-forward service/dragonfly-jaeger-query 16686:$JAEGER_QUERY_PORT + echo "Visit http://127.0.0.1:16686/search?limit=20&lookback=1h&maxDuration&minDuration&service=dragonfly to query download events" +``` + + + +Check that Dragonfly is deployed successfully: + +```shell +$ kubectl get po -n dragonfly-system +NAME READY STATUS RESTARTS AGE +dragonfly-client-6jgzn 1/1 Running 0 17m +dragonfly-client-qzcz9 1/1 Running 0 17m +dragonfly-manager-6bc4454d94-ldsk7 1/1 Running 0 17m +dragonfly-mysql-0 1/1 Running 0 17m +dragonfly-redis-master-0 1/1 Running 0 17m +dragonfly-redis-replicas-0 1/1 Running 0 17m +dragonfly-redis-replicas-1 1/1 Running 0 17m +dragonfly-redis-replicas-2 1/1 Running 0 17m +dragonfly-scheduler-0 1/1 Running 0 17m +dragonfly-scheduler-1 1/1 Running 0 17m +dragonfly-scheduler-2 1/1 Running 0 17m +dragonfly-seed-client-0 1/1 Running 0 17m +dragonfly-seed-client-1 1/1 Running 0 17m +dragonfly-seed-client-2 1/1 Running 0 17m +``` + +#### Expose the Proxy service port + +Create the `dfstore.yaml` configuration to expose the port on which the Dragonfly Peer's HTTP proxy listens. +The default port is `4001` and set`targetPort` to `4001`. + +```yaml +kind: Service +apiVersion: v1 +metadata: + name: dfstore +spec: + selector: + app: dragonfly + component: client + release: dragonfly + + ports: + - protocol: TCP + port: 4001 + targetPort: 4001 + + type: NodePort +``` + +Create service: + +```shell +kubectl --namespace dragonfly-system apply -f dfstore.yaml +``` + +Forward request to Dragonfly Peer's HTTP proxy: + +```shell +kubectl --namespace dragonfly-system port-forward service/dfstore 4001:4001 +``` + +### Install Dragonfly Endpoint plugin + +#### Set environment variables for Dragonfly Endpoint configuration + +Create `config.json` configuration,and set `DRAGONFLY_ENDPOINT_CONFIG` environment variable for `config.json` file path. + +```shell +export DRAGONFLY_ENDPOINT_CONFIG=/etc/dragonfly-endpoint/config.json +``` + +The default configuration path is: + +- linux: `/etc/dragonfly-endpoint/config.json` +- darwin: `~/.dragonfly-endpoint/config.json` + +#### Dragonfly Endpoint configuration + +Create the `config.json` configuration to configure the Dragonfly Endpoint for S3, the configuration is as follows: + +> Notice: Replace the `addr` address with your actual address. + +```json +{ + "addr": "http://127.0.0.1:4001", + "header": {}, + "filter": [ + "X-Amz-Algorithm", + "X-Amz-Credential", + "X-Amz-Date", + "X-Amz-Expires", + "X-Amz-SignedHeaders", + "X-Amz-Signature" + ], + "object_storage": { + "type": "s3", + "bucket_name": "your_s3_bucket_name", + "region": "your_s3_access_key", + "access_key": "your_s3_access_key", + "secret_key": "your_s3_access_key" + } +} +``` + +- addr: The address of Drangonfly's Peer HTTP proxy. +- header: Adds a request header to the request. +- filter: Used to generate unique tasks and filter unnecessary query parameters in the URL. +- object_storage: The object storage configuration, where type can be s3, oss, abs and gcs. + +In the filter of the configuration, set different values when using different object storage: + +| Type | Value | +| ---- | ---------------------------------------------------------------------------------------------------------- | +| OSS | "Expires&Signature" | +| S3 | "X-Amz-Algorithm&X-Amz-Credential&X-Amz-Date&X-Amz-Expires&X-Amz-SignedHeaders&X-Amz-Signature" | +| OBS | "X-Amz-Algorithm&X-Amz-Credential&X-Amz-Date&X-Obs-Date&X-Amz-Expires&X-Amz-SignedHeaders&X-Amz-Signature" | + +##### Object storage configuration + +In addition to S3, Dragonfly Endpoint plugin also supports OSS, GCS and ABS. +Different object storage configurations are as follows: + +> Notice: Replace the `addr` address with your actual address. + +OSS(Object Storage Service) + +```json +{ + "addr": "http://127.0.0.1:4001", + "header": {}, + "filter": ["Expires", "Signature"], + "object_storage": { + "type": "oss", + "bucket_name": "your_oss_bucket_name", + "endpoint": "your_oss_endpoint", + "access_key_id": "your_oss_access_key_id", + "access_key_secret": "your_oss_access_key_secret" + } +} +``` + +GCS(Google Cloud Storage) + +```json +{ + "addr": "http://127.0.0.1:4001", + "header": {}, + "object_storage": { + "type": "gcs", + "bucket_name": "your_gcs_bucket_name", + "project_id": "your_gcs_project_id", + "service_account_path": "your_gcs_service_account_path" + } +} +``` + +ABS(Azure Blob Storage) + +```json +{ + "addr": "http://127.0.0.1:4001", + "header": {}, + "object_storage": { + "type": "abs", + "account_name": "your_abs_account_name", + "account_key": "your_abs_account_key", + "container_name": "your_abs_container_name" + } +} +``` + +### TorchServe integrates Dragonfly Endpoint plugin + +For detailed installation documentation, please refer to [TorchServe document](https://pytorch.org/serve/). + +#### Binary installation + +##### Plugin Prerequisites + +| Name | Version | Document | +| ---------- | ------- | ---------------------------------------------------------------------------- | +| Python | 3.8.0+ | [https://www.python.org/](https://www.python.org/) | +| TorchServe | 0.4.0+ | [pytorch.org/serve/](https://pytorch.org/serve/) | +| Java | 11 | [https://openjdk.org/projects/jdk/11/](https://openjdk.org/projects/jdk/11/) | + +Install TorchServe dependencies and torch-model-archiver: + +```shell +python ./ts_scripts/install_dependencies.py +conda install torchserve torch-model-archiver torch-workflow-archiver -c pytorch +``` + +Clone TorchServe repository: + +```shell +git clone https://github.com/pytorch/serve.git +cd serve +``` + +Create [model-store](https://pytorch.org/serve/getting_started.html?highlight=model+store) directory to store the models: + +```shell +mkdir model-store +chmod 777 model-store +``` + +Create [plugins-path](https://github.com/pytorch/serve/tree/master/plugins/docs) directory to store the binaries of the plugin: + +```shell +mkdir plugins-path +``` + +#### Package Dragonfly Endpoint plugin + +Clone dragonfly-endpoint repository: + +```shell +git clone https://github.com/dragonflyoss/dragonfly-endpoint.git +``` + +Build the dragonfly-endpoint project to generate file in the `build/libs` directory: + +```shell +cd ./dragonfly-endpoint +gradle shadowJar +``` + +**Note:** Due to the limitations of TorchServe's JVM, the best Java version for Gradle is 11, +as a higher version will cause the plugin to fail to parse. + +Move the Jar file into the plugins-path directory: + +```shell +mv build/libs/dragonfly_endpoint-1.0-all.jar +``` + +Prepare the plugin configuration `config.json`, and use S3 as the object storage: + +> Notice: Replace the `addr` address with your actual address. + +```shell +{ + "addr": "http://127.0.0.1:4001", + "header": { + }, + "filter": [ + "X-Amz-Algorithm", + "X-Amz-Credential", + "X-Amz-Date", + "X-Amz-Expires", + "X-Amz-SignedHeaders", + "X-Amz-Signature" + ], + "object_storage": { + "type": "s3", + "bucket_name": "your_s3_bucket_name", + "region": "your_s3_access_key", + "access_key": "your_s3_access_key", + "secret_key": "your_s3_access_key" + } +} +``` + +Set the environment variables for the configuration: + +```shell +export DRAGONFLY_ENDPOINT_CONFIG=/etc/dragonfly-endpoint/config.json +``` + +`--model-store`sets the previously created directory to store the models and `--plugins-path` sets +the previously created directory to store the plugins.Start the TorchServe with Dragonfly Endpoint plugin: + +```shell +torchserve --start --model-store --plugins-path= +``` + +#### Verify TorchServe binary + +Prepare the model. Download a model from [Model ZOO](https://pytorch.org/serve/model_zoo.html#) or +package the model refer to [Torch Model archiver for TorchServe](https://github.com/pytorch/serve/tree/master/model-archiver). +Use `squeezenet1_1_scripted.mar` model to verify: + +```shell +wget https://torchserve.pytorch.org/mar_files/squeezenet1_1_scripted.mar +``` + +Upload the model to object storage. For detailed uploading the model to S3, please refer to [S3](https://aws.amazon.com/s3/?nc1=h_ls)。 + +```shell +# Download the command line tool +pip install awscli +# Configure the key as prompted +aws configure +# Upload file +aws s3 cp < local file path > s3://< bucket name >/< Target path > +``` + +TorchServe plugin is named Dragonfly, please refer to [TorchServe Register API](https://pytorch.org/serve/management_api.html#register-a-model) +for details of plugin API. The `url` parameter are not supported and add the `file_name` +parameter which is the model file name to download. +Download the model: + +```shell +curl -X POST "http://localhost:8081/dragonfly/models?file_name=squeezenet1_1.mar" +``` + +Verify the model download successful: + + + +```shell +{ + "Status": "Model "squeezenet1_1" Version: 1.0 registered with 0 initial workers. Use scale workers API to add workers for the model." +} +``` + + + +Added model worker for inference: + +```shell +curl -v -X PUT "http://localhost:8081/models/squeezenet1_1?min_worker=1" +``` + +Check the number of worker is increased: + +```shell +* About to connect() to localhost port 8081 (#0) +* Trying ::1... +* Connected to localhost (::1) port 8081 (#0) +> PUT /models/squeezenet1_1?min_worker=1 HTTP/1.1 +> User-Agent: curl/7.29.0 +> Host: localhost:8081 +> Accept: */* +> +< HTTP/1.1 202 Accepted +< content-type: application/json +< x-request-id: 66761b5a-54a7-4626-9aa4-12041e0e4e63 +< Pragma: no-cache +< Cache-Control: no-cache; no-store, must-revalidate, private +< Expires: Thu, 01 Jan 1970 00:00:00 UTC +< content-length: 47 +< connection: keep-alive +< +{ + "status": "Processing worker updates..." +} +* Connection #0 to host localhost left intact +``` + +Call inference API: + +```shell +# Prepare pictures that require reasoning. +curl -O https://raw.githubusercontent.com/pytorch/serve/master/docs/images/kitten_small.jpg +curl -O https://raw.githubusercontent.com/pytorch/serve/master/docs/images/dogs-before.jpg + +# Call inference API. +curl http://localhost:8080/predictions/squeezenet1_1 -T kitten_small.jpg -T dogs-before.jpg +``` + +Check the response successful: + +```shell +{ + "lynx": 0.5455784201622009, + "tabby": 0.2794168293476105, + "Egyptian_cat": 0.10391931980848312, + "tiger_cat": 0.062633216381073, + "leopard": 0.005019133910536766 +} +``` + +#### Install TorchServe with Docker + +##### Docker configuration + +Pull `dragonflyoss/dragonfly-endpoint` image with the plugin. The following is an +example of the CPU version of TorchServe, refer to [Dockerfile](https://github.com/dragonflyoss/dragonfly-endpoint/blob/main/images/Dockerfile). + +```shell +docker pull dragonflyoss/dragonfly-endpoint +``` + +Create [model-store](https://pytorch.org/serve/getting_started.html?highlight=model+store) directory to store the model files: + +```shell +mkdir model-store +chmod 777 model-store +``` + +Prepare the plugin configuration `config.json`, and use S3 as the object storage: + +> Notice: Replace the `addr` address with your actual address. + +```shell +{ + "addr": "http://127.0.0.1:4001", + "header": { + }, + "filter": [ + "X-Amz-Algorithm", + "X-Amz-Credential", + "X-Amz-Date", + "X-Amz-Expires", + "X-Amz-SignedHeaders", + "X-Amz-Signature" + ], + "object_storage": { + "type": "s3", + "bucket_name": "your_s3_bucket_name", + "region": "your_s3_access_key", + "access_key": "your_s3_access_key", + "secret_key": "your_s3_access_key" + } +} +``` + +Set the environment variables for the configuration: + +```shell +export DRAGONFLY_ENDPOINT_CONFIG=/etc/dragonfly-endpoint/config.json +``` + +Mount the `model-store` and dragonfly-endpoint configuration directory. Run the container: + + + +```shell +# Environment variable configuration path. +sudo docker run --rm -it --network host -v $(pwd)/model-store:/home/model-server/model-store -v ${DRAGONFLY_ENDPOINT_CONFIG}:${DRAGONFLY_ENDPOINT_CONFIG} dragonflyoss/dragonfly-endpoint:latest +``` + + + +#### Verify TorchServe container + +Prepare the model. Download a model from [Model ZOO](https://pytorch.org/serve/model_zoo.html#) or +package the model refer to [Torch Model archiver for TorchServe](https://github.com/pytorch/serve/tree/master/model-archiver). +Use `squeezenet1_1_scripted.mar` model to verify: + +```shell +wget https://torchserve.pytorch.org/mar_files/squeezenet1_1_scripted.mar +``` + +Upload the model to object storage. For detailed uploading the model to S3, please refer to [S3](https://aws.amazon.com/s3/?nc1=h_ls)。 + +```shell +# Download the command line tool +pip install awscli +# Configure the key as prompted +aws configure +# Upload file +aws s3 cp < local file path > s3://< bucket name >/< Target path > +``` + +TorchServe plugin is named Dragonfly, please refer to [TorchServe Register API](https://pytorch.org/serve/management_api.html#register-a-model) +for details of plugin API. The `url` parameter are not supported and add the `file_name` +parameter which is the model file name to download. +Download a model: + +```shell +curl -X POST "http://localhost:8081/dragonfly/models?file_name=squeezenet1_1.mar" +``` + +Verify the model download successful: + + + +```shell +{ + "Status": "Model "squeezenet1_1" Version: 1.0 registered with 0 initial workers. Use scale workers API to add workers for the model." +} +``` + + + +Added model worker for inference: + +```shell +curl -v -X PUT "http://localhost:8081/models/squeezenet1_1?min_worker=1" +``` + +Check the number of worker is increased: + +```shell +* About to connect() to localhost port 8081 (#0) +* Trying ::1... +* Connected to localhost (::1) port 8081 (#0) +> PUT /models/squeezenet1_1?min_worker=1 HTTP/1.1 +> User-Agent: curl/7.29.0 +> Host: localhost:8081 +> Accept: */* +> +< HTTP/1.1 202 Accepted +< content-type: application/json +< x-request-id: 66761b5a-54a7-4626-9aa4-12041e0e4e63 +< Pragma: no-cache +< Cache-Control: no-cache; no-store, must-revalidate, private +< Expires: Thu, 01 Jan 1970 00:00:00 UTC +< content-length: 47 +< connection: keep-alive +< +{ + "status": "Processing worker updates..." +} +* Connection #0 to host localhost left intact +``` + +Call inference API: + +```shell +# Prepare pictures that require reasoning. +curl -O https://raw.githubusercontent.com/pytorch/serve/master/docs/images/kitten_small.jpg +curl -O https://raw.githubusercontent.com/pytorch/serve/master/docs/images/dogs-before.jpg + +# Call inference API. +curl http://localhost:8080/predictions/squeezenet1_1 -T kitten_small.jpg -T dogs-before.jpg +``` + +Check the response successful: + +```shell +{ + "lynx": 0.5455784201622009, + "tabby": 0.2794168293476105, + "Egyptian_cat": 0.10391931980848312, + "tiger_cat": 0.062633216381073, + "leopard": 0.005019133910536766 +} +``` + +## Performance testing + +Test the performance of single-machine model download by TorchServe API after the integration of Dragonfly P2P. +Due to the influence of the network environment of the machine itself, the actual download time is not important, +but the ratio of the increase in the download time in different scenarios is very important. + +![torchserve-dragonfly](../../resource/operations/integrations/torchserve-dragonfly.png) + +- TorchServe API: Use signed URL provided by Object Storage to download the model directly. +- TorchServe API & Dragonfly Cold Boot: Use `TorchServe API` to download model via Dragonfly P2P network and no cache hits. +- Hit Remote Peer: Use `TorchServe API` to download model via Dragonfly P2P network and hit the remote peer cache. +- Hit Local Peer: Use `TorchServe API` to download model via Dragonfly P2P network and hit the local peer cache. + +Test results show TorchServe and Dragonfly integration. It can effectively reduce the file download time. +Note that this test was a single-machine test, which means that in the case of cache hits, +the performance limitation is on the disk. If Dragonfly is deployed on multiple machines for +P2P download, the models download speed will be faster. diff --git a/versioned_docs/version-v2.2.0/operations/integrations/triton-server.md b/versioned_docs/version-v2.2.0/operations/integrations/triton-server.md new file mode 100644 index 00000000..a69fcbce --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/integrations/triton-server.md @@ -0,0 +1,497 @@ +--- +id: triton-server +title: Triton Server +slug: /operations/integrations/triton-server/ +--- + +This document will help you experience how to use Dragonfly with [TritonServe](https://github.com/pytorch/serve). +During the downloading of models, the file size is large and there are many services downloading the files at the same time. +The bandwidth of the storage will reach the limit and the download will be slow. + +![triton-server-download](../../resource/operations/integrations/triton-server-download.png) + +Dragonfly can be used to eliminate the bandwidth limit of the storage through P2P technology, thereby accelerating file downloading. + +![triton-server-p2p](../../resource/operations/integrations/triton-server-p2p.png) + +## Installation + +By integrating Dragonfly Repository Agent into Triton, download traffic through Dragonfly to +pull models stored in S3, OSS, GCS, and ABS, and register models in Triton. The Dragonfly Repository Agent is in +the [dragonfly-repository-agent](https://github.com/dragonflyoss/dragonfly-repository-agent) repository. + +### Prerequisites + +| Name | Version | Document | +| ------------------ | --------- | ------------------------------------------------------------------ | +| Kubernetes cluster | 1.20+ | [kubernetes.io](https://kubernetes.io/) | +| Helm | 3.8.0+ | [helm.sh](https://helm.sh/) | +| Triton Server | 23.08-py3 | [Triton Server](https://github.com/triton-inference-server/server) | + +### Dragonfly Kubernetes Cluster Setup + +For detailed installation documentation, please refer to [quick-start-kubernetes](../../getting-started/quick-start/kubernetes.md). + +#### Prepare Kubernetes Cluster + +[Kind](https://kind.sigs.k8s.io/) is recommended if no kubernetes cluster is available for testing. + +Create kind multi-node cluster configuration file `kind-config.yaml`, configuration content is as follows: + +```yaml +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + - role: worker +``` + +Create a kind multi-node cluster using the configuration file: + +```shell +kind create cluster --config kind-config.yaml +``` + +Switch the context of kubectl to kind cluster: + +```shell +kubectl config use-context kind-kind +``` + +#### Kind loads Dragonfly image + +Pull Dragonfly latest images: + +```shell +docker pull dragonflyoss/scheduler:latest +docker pull dragonflyoss/manager:latest +docker pull dragonflyoss/client:latest +``` + +Kind cluster loads Dragonfly latest images: + +```shell +kind load docker-image dragonflyoss/scheduler:latest +kind load docker-image dragonflyoss/manager:latest +kind load docker-image dragonflyoss/client:latest +``` + +#### Create Dragonfly cluster based on helm charts + +Create helm charts configuration file `charts-config.yaml` and set +`client.config.proxy.rules.regex` to match the download path of the object storage. +Example: add `regex:.*models.*` to match download request from object storage bucket `models`. +Configuration content is as follows: + +```yaml +manager: + image: + repository: dragonflyoss/manager + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +scheduler: + image: + repository: dragonflyoss/scheduler + tag: latest + metrics: + enable: true + config: + verbose: true + pprofPort: 18066 + +seedClient: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + +client: + image: + repository: dragonflyoss/client + tag: latest + metrics: + enable: true + config: + verbose: true + proxy: + server: + port: 4001 + registryMirror: + addr: https://index.docker.io + rules: + - regex: 'blobs/sha256.*' + # Proxy all http downlowd requests of model bucket path. + - regex: '.*models.*' +``` + +Create a Dragonfly cluster using the configuration file: + + + +```shell +$ helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ +$ helm install --wait --create-namespace --namespace dragonfly-system dragonfly dragonfly/dragonfly -f charts-config.yaml +LAST DEPLOYED: Mon June 27 19:56:34 2024 +NAMESPACE: dragonfly-system +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +1. Get the scheduler address by running these commands: + export SCHEDULER_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=scheduler" -o jsonpath={.items[0].metadata.name}) + export SCHEDULER_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $SCHEDULER_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + kubectl --namespace dragonfly-system port-forward $SCHEDULER_POD_NAME 8002:$SCHEDULER_CONTAINER_PORT + echo "Visit http://127.0.0.1:8002 to use your scheduler" + +2. Get the dfdaemon port by running these commands: + export DFDAEMON_POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly,component=dfdaemon" -o jsonpath={.items[0].metadata.name}) + export DFDAEMON_CONTAINER_PORT=$(kubectl get pod --namespace dragonfly-system $DFDAEMON_POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + You can use $DFDAEMON_CONTAINER_PORT as a proxy port in Node. + +3. Configure runtime to use dragonfly: + https://d7y.io/docs/getting-started/quick-start/kubernetes/ + + +4. Get Jaeger query URL by running these commands: + export JAEGER_QUERY_PORT=$(kubectl --namespace dragonfly-system get services dragonfly-jaeger-query -o jsonpath="{.spec.ports[0].port}") + kubectl --namespace dragonfly-system port-forward service/dragonfly-jaeger-query 16686:$JAEGER_QUERY_PORT + echo "Visit http://127.0.0.1:16686/search?limit=20&lookback=1h&maxDuration&minDuration&service=dragonfly to query download events" +``` + + + +Check that Dragonfly is deployed successfully: + +```shell +$ kubectl get pods -n dragonfly-system +NAME READY STATUS RESTARTS AGE +dragonfly-client-qhkn8 1/1 Running 0 21m3s +dragonfly-client-qzcz9 1/1 Running 0 21m3s +dragonfly-manager-6bc4454d94-ldsk7 1/1 Running 0 21m3s +dragonfly-mysql-0 1/1 Running 0 21m3s +dragonfly-redis-master-0 1/1 Running 0 21m3s +dragonfly-redis-replicas-0 1/1 Running 0 21m3s +dragonfly-redis-replicas-1 1/1 Running 0 21m3s +dragonfly-redis-replicas-2 1/1 Running 0 21m3s +dragonfly-scheduler-0 1/1 Running 0 21m3s +dragonfly-scheduler-1 1/1 Running 0 21m3s +dragonfly-scheduler-2 1/1 Running 0 21m3s +dragonfly-seed-client-0 1/1 Running 0 21m3s +dragonfly-seed-client-1 1/1 Running 0 21m3s +dragonfly-seed-client-2 1/1 Running 0 21m3s +``` + +#### Expose the Proxy service port + +Create the `dfstore.yaml` configuration file to expose the port on which the +Dragonfly Peer's HTTP proxy listens. The default port is `4001` and set`targetPort` to `4001`. + +```yaml +kind: Service +apiVersion: v1 +metadata: + name: dfstore +spec: + selector: + app: dragonfly + component: client + release: dragonfly + + ports: + - protocol: TCP + port: 4001 + targetPort: 4001 + + type: NodePort +``` + +Create service: + +```shell +kubectl --namespace dragonfly-system apply -f dfstore.yaml +``` + +Forward request to Dragonfly Peer's HTTP proxy: + +```shell +kubectl --namespace dragonfly-system port-forward service/dfstore 4001:4001 +``` + +### Install Dragonfly Repository Agent + +#### Set Dragonfly Repository Agent configuration + +Create the `dragonfly_config.json`configuration file, the configuration is as follows: + +> Notice: Replace the `addr` address with your actual address. + +```shell +{ + "proxy": "http://127.0.0.1:4001", + "header": { + }, + "filter": [ + "X-Amz-Algorithm", + "X-Amz-Credential&X-Amz-Date", + "X-Amz-Expires", + "X-Amz-SignedHeaders", + "X-Amz-Signature" + ] +} +``` + +- proxy: The address of Dragonfly Peer's HTTP Proxy. +- header: Adds a request header to the request. +- filter: Used to generate unique tasks and filter unnecessary query parameters in the URL. + +In the filter of the configuration, set different values when using different object storage: + + + +| type | value | +| ---- | ------------------------------------------------------------------------------------------------------------------------------ | +| OSS | ["Expires","Signature","ns"] | +| S3 | ["X-Amz-Algorithm", "X-Amz-Credential", "X-Amz-Date", "X-Amz-Expires", "X-Amz-SignedHeaders", "X-Amz-Signature"] | +| OBS | ["X-Amz-Algorithm", "X-Amz-Credential", "X-Amz-Date", "X-Obs-Date", "X-Amz-Expires", "X-Amz-SignedHeaders", "X-Amz-Signature"] | + + + +#### Set Model Repository configuration + +Create `cloud_credential.json` cloud storage credential, the configuration is as follows: + +```shell +{ + "gs": { + "": "PATH_TO_GOOGLE_APPLICATION_CREDENTIALS", + "gs://gcs-bucket-002": "PATH_TO_GOOGLE_APPLICATION_CREDENTIALS_2" + }, + "s3": { + "": { + "secret_key": "AWS_SECRET_ACCESS_KEY", + "key_id": "AWS_ACCESS_KEY_ID", + "region": "AWS_DEFAULT_REGION", + "session_token": "", + "profile": "" + }, + "s3://s3-bucket-002": { + "secret_key": "AWS_SECRET_ACCESS_KEY_2", + "key_id": "AWS_ACCESS_KEY_ID_2", + "region": "AWS_DEFAULT_REGION_2", + "session_token": "AWS_SESSION_TOKEN_2", + "profile": "AWS_PROFILE_2" + } + }, + "as": { + "": { + "account_str": "AZURE_STORAGE_ACCOUNT", + "account_key": "AZURE_STORAGE_KEY" + }, + "as://Account-002/Container": { + "account_str": "", + "account_key": "" + } + } +} +``` + +In order to pull the model through Dragonfly, the model configuration file needs to +be added following code in `config.pbtxt` file: + +```shell +model_repository_agents +{ + agents [ + { + name: "dragonfly", + } + ] +} +``` + +The [densenet_onnx example](https://github.com/dragonflyoss/dragonfly-repository-agent/tree/main/examples/model_repository/densenet_onnx) +contains modified configuration and model file. Modified `config.pbtxt` such as: + +```shell +name: "densenet_onnx" +platform: "onnxruntime_onnx" +max_batch_size : 0 +input [ + { + name: "data_0" + data_type: TYPE_FP32 + format: FORMAT_NCHW + dims: [ 3, 224, 224 ] + reshape { shape: [ 1, 3, 224, 224 ] } + } +] +output [ + { + name: "fc6_1" + data_type: TYPE_FP32 + dims: [ 1000 ] + reshape { shape: [ 1, 1000, 1, 1 ] } + label_filename: "densenet_labels.txt" + } +] +model_repository_agents +{ + agents [ + { + name: "dragonfly", + } + ] +} +``` + +### Triton Server integrates Dragonfly Repository Agent plugin + +#### Install Triton Server with Docker + +Pull `dragonflyoss/dragonfly-repository-agent` image which is integrated Dragonfly Repository Agent plugin +in Triton Server, refer to [Dockerfile](https://github.com/dragonflyoss/dragonfly-repository-agent/blob/main/Dockerfile). + +```shell +docker pull dragonflyoss/dragonfly-repository-agent:latest +``` + +Run the container and mount the configuration directory: + +```shell +docker run --network host --rm \ + -v ${path-to-config-dir}:/home/triton/ \ + dragonflyoss/dragonfly-repository-agent:latest tritonserver \ + --model-repository=${model-repository-path} +``` + +- `path-to-config-dir`: The files path of `dragonfly_config.json`&`cloud_credential.json`. +- `model-repository-path`: The path of remote model repository. + +The correct output is as follows: + + + +```shell +============================= +== Triton Inference Server == +============================= +successfully loaded 'densenet_onnx' +I1130 09:43:22.595672 1 server.cc:604] ++------------------+------------------------------------------------------------------------+ +| Repository Agent | Path | ++------------------+------------------------------------------------------------------------+ +| dragonfly | /opt/tritonserver/repoagents/dragonfly/libtritonrepoagent_dragonfly.so | ++------------------+------------------------------------------------------------------------+ + +I1130 09:43:22.596011 1 server.cc:631] ++-------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Backend | Path | Config | ++-------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| pytorch | /opt/tritonserver/backends/pytorch/libtriton_pytorch.so | {} | +| onnxruntime | /opt/tritonserver/backends/onnxruntime/libtriton_onnxruntime.so | {"cmdline":{"auto-complete-config":"true","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","default-max-batch-size":"4"}} | ++-------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +I1130 09:43:22.596112 1 server.cc:674] ++---------------+---------+--------+ +| Model | Version | Status | ++---------------+---------+--------+ +| densenet_onnx | 1 | READY | ++---------------+---------+--------+ + +I1130 09:43:22.598318 1 metrics.cc:703] Collecting CPU metrics +I1130 09:43:22.599373 1 tritonserver.cc:2435] ++----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Option | Value | ++----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| server_id | triton | +| server_version | 2.37.0 | +| server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data parameters statistics trace logging | +| model_repository_path[0] | s3://192.168.36.128:9000/models | +| model_control_mode | MODE_NONE | +| strict_model_config | 0 | +| rate_limit | OFF | +| pinned_memory_pool_byte_size | 268435456 | +| min_supported_compute_capability | 6.0 | +| strict_readiness | 1 | +| exit_timeout | 30 | +| cache_enabled | 0 | ++----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +I1130 09:43:22.610334 1 grpc_server.cc:2451] Started GRPCInferenceService at 0.0.0.0:8001 +I1130 09:43:22.612623 1 http_server.cc:3558] Started HTTPService at 0.0.0.0:8000 +I1130 09:43:22.695843 1 http_server.cc:187] Started Metrics Service at 0.0.0.0:8002 +``` + + + +Execute the following command to check the Dragonfly logs: + +```shell +# Find pod name. +export POD_NAME=$(kubectl get pods --namespace dragonfly-system -l "app=dragonfly,release=dragonfly, +component=client" -o=jsonpath='{.items[?(@.spec.nodeName=="kind-worker")].metadata.name}' | head -n 1 ) + +# Check logs. +kubectl -n dragonfly-system exec -it ${POD_NAME} -- grep "download task succeeded" /var/log/dragonfly/dfdaemon/* +``` + +The expected output is as follows: + +```shell +{ + 2024-04-19T02:44:09.259458Z "INFO" + "download_task":"dragonfly-client/src/grpc/dfdaemon_download.rs:276":: "download task succeeded" + "host_id": "172.18.0.3-kind-worker", + "task_id": "a46de92fcb9430049cf9e61e267e1c3c9db1f1aa4a8680a048949b06adb625a5", + "peer_id": "172.18.0.3-kind-worker-86e48d67-1653-4571-bf01-7e0c9a0a119d" +} +``` + +#### Verify + +Call inference API: + + + +```shell +docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:23.08-py3-sdk /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg +``` + + + +Check the response successful: + +```shell +Request 01 +Image '/workspace/images/mug.jpg': + 15.349563 (504) = COFFEE MUG + 13.227461 (968) = CUP + 10.424893 (505) = COFFEEPOT +``` + +## Performance testing + +Test the performance of single-machine model download by Triton API after the integration of Dragonfly P2P. +Due to the influence of the network environment of the machine itself, the actual download time is not important, +but The proportion of download speed in different scenarios is more meaningful: + +![triton-server-dragonfly](../../resource/operations/integrations/triton-server-dragonfly.png) + +- Triton API: Use signed URL provided by Object Storage to download the model directly. +- Triton API & Dragonfly Cold Boot: Use `Triton Serve API` to download model via Dragonfly P2P network and no cache hits. +- Hit Remote Peer: Use `Triton Serve API` to download model via Dragonfly P2P network and hit the remote peer cache. +- Hit Local Peer: Use `Triton Serve API` to download model via Dragonfly P2P network and hit the local peer cache. + +Test results show Triton and Dragonfly integration. It can effectively reduce the file download time. +Note that this test was a single-machine test, which means that in the case of cache hits, +the performance limitation is on the disk. If Dragonfly is deployed on multiple machines for P2P download, +the models download speed will be faster. diff --git a/versioned_docs/version-v2.2.0/operations/integrations/upgrade.md b/versioned_docs/version-v2.2.0/operations/integrations/upgrade.md new file mode 100644 index 00000000..b630da3a --- /dev/null +++ b/versioned_docs/version-v2.2.0/operations/integrations/upgrade.md @@ -0,0 +1,55 @@ +--- +id: upgrade +title: Upgrade +slug: /operations/integrations/upgrade/ +--- + +## Upgrade the cluster deployed by Helm {#upgrade-the-cluster-deployed-by-helm} + +User can deploy a dragonfly cluster on kubernetes with Helm. +The [helm chart](https://github.com/dragonflyoss/helm-charts) is a project managed by dragonfly Team. +User can query and download the latest version chart or history version +from [Artifact Hub](https://artifacthub.io/packages/helm/dragonfly/dragonfly). + +Before Upgrade, user must read the [Change Log](https://github.com/dragonflyoss/dragonfly/blob/main/CHANGELOG.md) to +make sure the breaking changes between the current version and target version. + +```shell script +# check the dragonfly repo existence +helm repo list | grep dragonfly + +# [Optional] add repo if not exist +helm repo add dragonfly https://dragonflyoss.github.io/helm-charts/ + +# update locally cached repo information +helm repo update + +# upgrade the dragonfly +helm upgrade --install -n dragonfly-system dragonfly dragonfly/dragonfly [--version 0.5.50] [-f values.yaml] +``` + +**Note:** + +1. On the above example, `dragonfly/dragonfly` means `dragonfly` release under `dragonfly` repo, + `0.5.50` is the upgrading target version,user can specify the version as you want. +2. If user need specify extra parameters, user can edit the `values.yaml` you configured for the old release and + specify with `-f values.yaml`. +3. If you want to drop the chart parameters you configured for the old release or set some new parameters, + it is recommended to add `--reset-values` flag in helm upgrade command. +4. When upgrading, If you want to reuse the last release's values, it is recommended to add `--reuse-values` flag + in helm upgrade command. +5. More information about `helm upgrade` sub-command + can be found in [helm home page](https://helm.sh/docs/helm/helm_upgrade/). +6. For those users can't fetch the chart from remote repo, follow this step: + + ```shell script + # download dragonfly helm chart from github source repo. use version 0.5.50 as an example + + # method 1: + wget https://github.com/dragonflyoss/helm-charts/releases/download/dragonfly-0.5.50/dragonfly-0.5.50.tgz + # method 2: + git clone -b dragonfly-0.5.50 --depth=1 https://github.com/dragonflyoss/helm-charts.git + + # upgrade the dragonfly + helm upgrade --install -n dragonfly-system dragonfly [-f values.yaml | --reset-values] + ``` diff --git a/versioned_docs/version-v2.2.0/reference/commands/client/dfdaemon.md b/versioned_docs/version-v2.2.0/reference/commands/client/dfdaemon.md new file mode 100644 index 00000000..eefcc354 --- /dev/null +++ b/versioned_docs/version-v2.2.0/reference/commands/client/dfdaemon.md @@ -0,0 +1,154 @@ +--- +id: dfdaemon +title: Dfdaemon +slug: /reference/commands/client/dfdaemon/ +--- + +A high performance P2P download daemon in Dragonfly that can download resources of different protocols. +When user triggers a file downloading task, dfdaemon will download the pieces of file from other peers. +Meanwhile, it will act as an uploader to support other peers to download pieces from it if it owns them. + +## Options {#dfdaemon-options} + + + +```text + -c, --config + Specify config file to use + + [default: /etc/dragonfly/dfdaemon.yaml] + + --lock-path + Specify the lock file path + + [default: /var/lock/dragonfly/dfdaemon.lock] + + -l, --log-level + Specify the logging level [trace, debug, info, warn, error] + + [default: info] + + --log-dir + Specify the log directory + + [default: /var/log/dragonfly/dfdaemon] + + --log-max-files + Specify the max number of log files + + [default: 24] + + --verbose + Specify whether to print log + + -h, --help + Print help (see a summary with '-h') + + -V, --version + Print version +``` + + + +## Example + +### Download with Proxy + +When the dfdameon setups, it setups a HTTP proxy. Users can download traffic is proxied to P2P networks via the HTTP Proxy. + +#### Download with HTTP protocol + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`, +refer to [Dfdaemon](../../configuration/client/dfdaemon.md). + +> Notice: set `proxy.rules.regex` to match the download path. +If the regex matches, intercepts download traffic and forwards it to the P2P network. + +```yaml +proxy: + server: + port: 4001 + rules: + - regex: 'example.*' +``` + +```shell +curl -v -x 127.0.0.1:4001 http://example.com/file.txt --output /tmp/file.txt +``` + +#### Download with HTTPS protocol + +##### Download with Insecure HTTPS protocol + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`, +refer to [Dfdaemon](../../configuration/client/dfdaemon.md). + +> Notice: set `proxy.rules.regex` to match the download path. +If the regex matches, intercepts download traffic and forwards it to the P2P network. + +```yaml +proxy: + server: + port: 4001 + rules: + - regex: 'example.*' +``` + +Download with Insecure HTTPS protocol: + +```shell +curl -v -x 127.0.0.1:4001 https://example.com/file.txt --insecure --output /tmp/file.txt +``` + +##### Download with HTTPS protocol by using custom CA certificates + +Generate a CA certificates: + +```shell +openssl req -x509 -sha256 -days 36500 -nodes -newkey rsa:4096 -keyout ca.key -out ca.crt +``` + +Trust the certificate at the OS level. + +- Ubuntu: + +```shell +cp ca.crt /usr/local/share/ca-certificates/ca.crt +update-ca-certificates +``` + +- Red Hat (CentOS etc): + +```shell +cp ca.crt /etc/pki/ca-trust/source/anchors/ca.crt +update-ca-trust +``` + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`, +refer to [Dfdaemon](../../configuration/client/dfdaemon.md). + +> Notice: set `proxy.rules.regex` to match the download path. +If the regex matches, intercepts download traffic and forwards it to the P2P network. + +```yaml +proxy: + server: + port: 4001 + caCert: ca.crt + caKey: ca.key + rules: + - regex: 'example.*' +``` + +Download with HTTPS protocol: + +```shell +curl -v -x 127.0.0.1:4001 https://example.com/file.txt --output /tmp/file.txt +``` + +## Log {#log} + +```text +1. set option --verbose if you want to print logs to Terminal +2. log path: /var/log/dragonfly/dfdaemon/ +``` diff --git a/versioned_docs/version-v2.2.0/reference/commands/client/dfget.md b/versioned_docs/version-v2.2.0/reference/commands/client/dfget.md new file mode 100644 index 00000000..6cc62d72 --- /dev/null +++ b/versioned_docs/version-v2.2.0/reference/commands/client/dfget.md @@ -0,0 +1,227 @@ +--- +id: dfget +title: Dfget +slug: /reference/commands/client/dfget/ +--- + +`dfget` is the client of Dragonfly used to download and upload files. +The unix socket of the dfdaemon GRPC service needs to be used during the upload and download process of dfget. +To use dfget, dfdaemon must be started. + +## Usage {#usage} + +dfget is the client of Dragonfly which takes +a role of peer in a P2P network. When user triggers a file downloading +task, dfget will download the pieces of +file from other peers. Meanwhile, it will act as an uploader to support other +peers to download pieces from it if it owns them. +In addition, dfget has the abilities to provide more advanced +functionality, such as network bandwidth limit, +transmission encryption and so on. + +```shell +dfget -O +dfget [command] +``` + +## Options {#options} + + + +```text +Usage: dfget [OPTIONS] --output + +Arguments: + + Specify the URL to download + +Options: + -O, --output + Specify the output path of downloading file + + -e, --endpoint + Endpoint of dfdaemon's GRPC server + + [default: /var/run/dragonfly/dfdaemon.sock] + + --timeout + Specify the timeout for downloading a file + + [default: 2h] + + --piece-length + Specify the byte length of the piece + + [default: 4194304] + + -d, --digest + Verify the integrity of the downloaded file using the specified digest, e.g. md5:86d3f3a95c324c9479bd8986968f4327 + + [default: ] + + -p, --priority + Specify the priority for scheduling task + + [default: 6] + + --application + Caller application which is used for statistics and access control + + [default: ] + + --tag + Different tags for the same url will be divided into different tasks + + [default: ] + + -H, --header
+ Specify the header for downloading file, e.g. --header='Content-Type: application/json' --header='Accept: application/json' + + --filtered-query-param + Filter the query parameters of the downloaded URL. If the download URL is the same, it will be scheduled as the same task, e.g. --filtered-query-param='signature' --filtered-query-param='timeout' + + --disable-back-to-source + Disable back-to-source download when dfget download failed + + --storage-region + Specify the region for the Object Storage Service + + --storage-endpoint + Specify the endpoint for the Object Storage Service + + --storage-access-key-id + Specify the access key ID for the Object Storage Service + + --storage-access-key-secret + Specify the access key secret for the Object Storage Service + + --storage-session-token + Specify the session token for Amazon Simple Storage Service(S3) + + --storage-credential-path + Specify the local path to credential file for Google Cloud Storage Service(GCS) + + --storage-predefined-acl + Specify the predefined ACL for Google Cloud Storage Service(GCS) + + [default: publicRead] + + --max-files + Specify the max count of file to download when downloading a directory. If the actual file count is greater than this value, the downloading will be rejected + + [default: 10] + + --max-concurrent-requests + Specify the max count of concurrent download files when downloading a directory + + [default: 5] + + -l, --log-level + Specify the logging level [trace, debug, info, warn, error] + + [default: info] + + --log-dir + Specify the log directory + + [default: /var/log/dragonfly/dfget] + + --log-max-files + Specify the max number of log files + + [default: 6] + + --verbose + Specify whether to print log + + -h, --help + Print help (see a summary with '-h') + + -V, --version + Print version +``` + +## Example {#example} + +### Download with HTTP protocol {#downlad-with-http} + +```shell +dfget https://:/ -O /tmp/file.txt +``` + +### Download with S3 protocol {#downlad-with-s3} + +```shell +# Download a file. +dfget s3:/// -O /tmp/file.txt --storage-access-key-id= --storage-access-key-secret= + +# Download a directory. +dfget s3:/// -O /tmp/path/ --storage-access-key-id= --storage-access-key-secret= +``` + +### Download with GCS protocol {#downlad-with-gcs} + +```shell +# Download a file. +dfget gs:/// -O /tmp/file.txt --storage-credential-path= + +# Download a directory. +dfget gs://// -O /tmp/path/ --storage-credential-path= +``` + +### Download with ABS protocol {#downlad-with-abs} + +```shell +# Download a file. +dfget abs:/// -O /tmp/file.txt --storage-access-key-id= --storage-access-key-secret= + +# Download a directory. +dfget abs://// -O /tmp/path/ --storage-access-key-id= --storage-access-key-secret= +``` + +### Download with OSS protocol {#downlad-with-oss} + +```shell +# Download a file. +dfget oss:/// -O /tmp/file.txt --storage-access-key-id= --storage-access-key-secret= --storage-endpoint= + +# Download a directory. +dfget oss://// -O /tmp/path/ --storage-access-key-id= --storage-access-key-secret= --storage-endpoint= +``` + +### Download with OBS protocol {#downlad-with-obs} + +```shell +# Download a file. +dfget obs:/// -O /tmp/file.txt --storage-access-key-id= --storage-access-key-secret= --storage-endpoint= + +# Download a directory. +dfget obs://// -O /tmp/path/ --storage-access-key-id= --storage-access-key-secret= --storage-endpoint= +``` + +### Download with COS protocol {#downlad-with-cos} + +> Note: The endpoint does not require `BucketName-APPID`, just --storage-endpoint=cos.region.myqcloud.com. + +```shell +# Download a file. +dfget cos:/// -O /tmp/file.txt --storage-access-key-id= --storage-access-key-secret= --storage-endpoint= + +# Download a directory. +dfget cos://// -O /tmp/path/ --storage-access-key-id= --storage-access-key-secret= --storage-endpoint= +``` + +### Download with HDFS protocol {#downlad-with-hdfs} + +```shell +dfget hdfs:///file.txt -O /tmp/file.txt --hdfs-delegation-token +``` + + + +## Log {#log} + +```text +1. set option --verbose if you want to print logs to Terminal +2. log path: /var/log/dragonfly/dfget/ +``` diff --git a/versioned_docs/version-v2.2.0/reference/commands/manager.md b/versioned_docs/version-v2.2.0/reference/commands/manager.md new file mode 100644 index 00000000..d90cecca --- /dev/null +++ b/versioned_docs/version-v2.2.0/reference/commands/manager.md @@ -0,0 +1,51 @@ +--- +id: manager +title: Manager +slug: /reference/commands/manager/ +--- + +Manager is a process that runs in the background +and plays the role of the brain of each subsystem cluster in Dragonfly. +It is used to manage the dynamic +configuration of each system module and provide functions +such as heartbeat keeping alive, monitoring the market, and product functions. + +## Usage {#usage} + +```text +manager [flags] +manager [command] +``` + +## Available Commands {#available-commands} + +```text +completion generate the autocompletion script for the specified shell +doc generate documents +help Help about any command +plugin show plugin +version show version +``` + +## Options {#options} + + + +```text + --config string the path of configuration file with yaml extension name, default is /etc/dragonfly/manager.yaml, it can also be set by env var: MANAGER_CONFIG + --console whether logger output records to the stdout +-h, --help help for manager + --jaeger string jaeger endpoint url, like: http://localhost:14250/api/traces + --pprof-port int listen port for pprof, 0 represents random port (default -1) + --service-name string name of the service for tracer (default "dragonfly-manager") + --verbose whether logger use debug level +``` + + + +## Log {#log} + +```text +1. set option --console if you want to print logs to Terminal +2. log path: /var/log/dragonfly/manager/ +``` diff --git a/versioned_docs/version-v2.2.0/reference/commands/scheduler.md b/versioned_docs/version-v2.2.0/reference/commands/scheduler.md new file mode 100644 index 00000000..3566087c --- /dev/null +++ b/versioned_docs/version-v2.2.0/reference/commands/scheduler.md @@ -0,0 +1,51 @@ +--- +id: scheduler +title: Scheduler +slug: /reference/commands/scheduler/ +--- + +Scheduler is a long-running process which receives +and manages download tasks from the client, +notify the seed peer to return to the source, generate and maintain a +P2P network during the download process, +and push suitable download nodes to the client + +## Usage {#usage} + +```text +scheduler [flags] +scheduler [command] +``` + +## Available Commands {#available-commands} + +```text +completion generate the autocompletion script for the specified shell +doc generate documents +help Help about any command +plugin show plugin +version show version +``` + +## Options {#options} + + + +```text + --config string the path of configuration file with yaml extension name, default is /etc/dragonfly/scheduler.yaml, it can also be set by env var: SCHEDULER_CONFIG + --console whether logger output records to the stdout + -h, --help help for scheduler + --jaeger string jaeger endpoint url, like: http://localhost:14250/api/traces + --pprof-port int listen port for pprof, 0 represents random port (default -1) + --service-name string name of the service for tracer (default "dragonfly-scheduler") + --verbose whether logger use debug level +``` + + + +## Log {#log} + +```text +1. set option --console if you want to print logs to Terminal +2. log path: /var/log/dragonfly/scheduler/ +``` diff --git a/versioned_docs/version-v2.2.0/reference/configuration/client/dfdaemon.md b/versioned_docs/version-v2.2.0/reference/configuration/client/dfdaemon.md new file mode 100644 index 00000000..8aa7504c --- /dev/null +++ b/versioned_docs/version-v2.2.0/reference/configuration/client/dfdaemon.md @@ -0,0 +1,225 @@ +--- +id: dfdaemon +title: Dfdaemon +slug: /reference/configuration/client/dfdaemon/ +--- + +## Configure Dfdaemon YAML File {#configure-dfdaemon-yaml-file} + +Configure `dfdaemon.yaml`, the default path is `/etc/dragonfly/dfdaemon.yaml`. + +```yaml +# verbose prints log to stdout. +verbose: true + +log: + # Specify the logging level [trace, debug, info, warn, error] + level: info + +# host is the host configuration for dfdaemon. +host: + # idc is the idc of the host. + idc: '' + # location is the location of the host. + location: '' +# # hostname is the hostname of the host. +# hostname: "" +# # ip is the advertise ip of the host. +# ip: "" + +server: + # pluginDir is the directory to store plugins. + pluginDir: /var/lib/dragonfly/plugins/dfdaemon/ + # cacheDir is the directory to store cache files. + cacheDir: /var/cache/dragonfly/dfdaemon/ + +download: + server: + # socketPath is the unix socket path for dfdaemon GRPC service. + socketPath: /var/run/dragonfly/dfdaemon.sock + # rateLimit is the default rate limit of the download speed in KiB/MiB/GiB per second, default is 10GiB/s. + rateLimit: 10GiB + # pieceTimeout is the timeout for downloading a piece from source. + pieceTimeout: 30s + # concurrentPieceCount is the number of concurrent pieces to download. + concurrentPieceCount: 10 + +upload: + server: + # port is the port to the grpc server. + port: 4000 + # # ip is the listen ip of the grpc server. + # ip: "" + # # CA certificate file path for mTLS. + # caCert: /etc/ssl/certs/ca.crt + # # GRPC server certificate file path for mTLS. + # cert: /etc/ssl/certs/server.crt + # # GRPC server key file path for mTLS. + # key: /etc/ssl/private/server.pem + # +# # Client configuration for remote peer's upload server. +# client: +# # CA certificate file path for mTLS. +# caCert: /etc/ssl/certs/ca.crt +# # GRPC client certificate file path for mTLS. +# cert: /etc/ssl/certs/client.crt +# # GRPC client key file path for mTLS. +# key: /etc/ssl/private/client.pem + # disableShared indicates whether disable to share data for other peers. + disableShared: false + # rateLimit is the default rate limit of the upload speed in KiB/MiB/GiB per second, default is 10GiB/s. + rateLimit: 10GiB + +manager: + # addr is manager address. + addr: http://manager-service:65003 +# # CA certificate file path for mTLS. +# caCert: /etc/ssl/certs/ca.crt +# # GRPC client certificate file path for mTLS. +# cert: /etc/ssl/certs/client.crt +# # GRPC client key file path for mTLS. +# key: /etc/ssl/private/client.pem + +scheduler: + # announceInterval is the interval to announce peer to the scheduler. + # Announcer will provide the scheduler with peer information for scheduling, + # peer information includes cpu, memory, etc. + announceInterval: 1m + # scheduleTimeout is the timeout for scheduling. If the scheduling timesout, dfdaemon will back-to-source + # download if enableBackToSource is true, otherwise dfdaemon will return download failed. + scheduleTimeout: 30s + # maxScheduleCount is the max count of schedule. + maxScheduleCount: 5 + # enableBackToSource indicates whether enable back-to-source download, when the scheduling failed. + enableBackToSource: true +# # CA certificate file path for mTLS. +# caCert: /etc/ssl/certs/ca.crt +# # GRPC client certificate file path for mTLS. +# cert: /etc/ssl/certs/client.crt +# # GRPC client key file path for mTLS. +# key: /etc/ssl/private/client.pem + +seedPeer: + # enable indicates whether enable seed peer. + enable: true + # type is the type of seed peer. + type: super + # clusterID is the cluster id of the seed peer cluster. + clusterID: 1 + # keepaliveInterval is the interval to keep alive with manager. + keepaliveInterval: 15s + +dynconfig: + # refreshInterval is the interval to refresh dynamic configuration from manager. + refreshInterval: 1m + +storage: + # dir is the directory to store task's metadata and content. + dir: /var/lib/dragonfly/ + # keep indicates whether keep the task's metadata and content when the dfdaemon restarts. + keep: true + # writeBufferSize is the buffer size for writing piece to disk, default is 128KB. + writeBufferSize: 131072 + # readBufferSize is the buffer size for reading piece from disk, default is 128KB. + readBufferSize: 131072 + +gc: + # interval is the interval to do gc. + interval: 900s + policy: + # taskTTL is the ttl of the task. + taskTTL: 21600s + # distHighThresholdPercent is the high threshold percent of the disk usage. + # If the disk usage is greater than the threshold, dfdaemon will do gc. + distHighThresholdPercent: 80 + # distLowThresholdPercent is the low threshold percent of the disk usage. + # If the disk usage is less than the threshold, dfdaemon will stop gc. + distLowThresholdPercent: 60 + +proxy: + server: + # port is the port to the proxy server. + port: 4001 + # # ip is the listen ip of the proxy server. + # ip: "" + # # caCert is the root CA cert path with PEM format for the proxy server to generate the server cert. + # # If ca_cert is empty, proxy will generate a smaple CA cert by rcgen::generate_simple_self_signed. + # # When client requests via the proxy, the client should not verify the server cert and set + # # insecure to true. If ca_cert is not empty, proxy will sign the server cert with the CA cert. If openssl is installed, + # # you can use openssl to generate the root CA cert and make the system trust the root CA cert. + # # Then set the ca_cert and ca_key to the root CA cert and key path. Dfdaemon generates the server cert + # # and key, and signs the server cert with the root CA cert. When client requests via the proxy, + # # the proxy can intercept the request by the server cert. + # + # caCert: "" + # # caKey is the root CA key path with PEM format for the proxy server to generate the server cert. + # # If ca_key is empty, proxy will generate a smaple CA key by rcgen::generate_simple_self_signed. + # # When client requests via the proxy, the client should not verify the server cert and set + # # insecure to true. If ca_key is not empty, proxy will sign the server cert with the CA cert. If openssl is installed, + # # you can use openssl to generate the root CA cert and make the system trust the root CA cert. + # # Then set the ca_cert and ca_key to the root CA cert and key path. Dfdaemon generates the server cert + # # and key, and signs the server cert with the root CA cert. When client requests via the proxy, + # # the proxy can intercept the request by the server cert. + # + # caKey: "" + # # basic_auth is the basic auth configuration for HTTP proxy in dfdaemon. If basic_auth is not + # # empty, the proxy will use the basic auth to authenticate the client by Authorization + # # header. The value of the Authorization header is "Basic base64(username:password)", refer + # # to https://en.wikipedia.org/wiki/Basic_access_authentication. + # basicAuth: + # # username is the username for basic auth. + # username: "admin" + # # password is the password for basic auth. + # password: "dragonfly" + # + # rules is the list of rules for the proxy server. + # regex is the regex of the request url. + # useTLS indicates whether use tls for the proxy backend. + # redirect is the redirect url. + # filteredQueryParams is the filtered query params to generate the task id. + # When filter is ["Signature", "Expires", "ns"], for example: + # http://example.com/xyz?Expires=e1&Signature=s1&ns=docker.io and http://example.com/xyz?Expires=e2&Signature=s2&ns=docker.io + # will generate the same task id. + # Default value includes the filtered query params of s3, gcs, oss, obs, cos. + # `X-Dragonfly-Use-P2P` header can instead of the regular expression of the rule. If the value is "true", + # the request will use P2P technology to distribute the content. If the value is "false", + # but url matches the regular expression in rules. The request will also use P2P technology to distribute the content. + rules: + - regex: 'blobs/sha256.*' + # useTLS: false + # redirect: "" + # filteredQueryParams: [] + registryMirror: + # addr is the default address of the registry mirror. Proxy will start a registry mirror service for the + # client to pull the image. The client can use the default address of the registry mirror in + # configuration to pull the image. The `X-Dragonfly-Registry` header can instead of the default address + # of registry mirror. + addr: https://index.docker.io + # # cert is the client cert path with PEM format for the registry. + # # If registry use self-signed cert, the client should set the + # # cert for the registry mirror. + # cert: "" + # disableBackToSource indicates whether disable to download back-to-source when download failed. + disableBackToSource: false + # prefetch pre-downloads full of the task when download with range request. + # X-Dragonfly-Prefetch priority is higher than prefetch in config. + # If the value is "true", the range request will prefetch the entire file. + # If the value is "false", the range request will fetch the range content. + prefetch: false + # prefetchRateLimit is the rate limit of the prefetch speed in KiB/MiB/GiB per second, default is 2GiB/s. + # The prefetch request has lower priority so limit the rate to avoid occupying the bandwidth impact other download tasks. + prefetchRateLimit: 2GiB + # readBufferSize is the buffer size for reading piece from disk, default is 32KB. + readBufferSize: 32768 + +metrics: + server: + # port is the port to the metrics server. + port: 4002 + # # ip is the listen ip of the metrics server. + # ip: "" +# # tracing is the tracing configuration for dfdaemon. +# tracing: +# # addr is the address to report tracing log. +# addr: "" +``` diff --git a/versioned_docs/version-v2.2.0/reference/configuration/manager.md b/versioned_docs/version-v2.2.0/reference/configuration/manager.md new file mode 100644 index 00000000..748af515 --- /dev/null +++ b/versioned_docs/version-v2.2.0/reference/configuration/manager.md @@ -0,0 +1,223 @@ +--- +id: manager +title: Manager +slug: /reference/configuration/manager/ +--- + +## Configure Manager YAML File {#configure-manager-yaml-file} + +The default path for the manager yaml configuration file is `/etc/dragonfly/manager.yaml` in linux, +and the default path is `$HOME/.dragonfly/config/manager.yaml` in darwin. + +```yaml +# Current server info used for server. +server: + # GRPC server configure. + grpc: + # # Access ip for other services, + # # when local ip is different with access ip, advertiseIP should be set. + # advertiseIP: 127.0.0.1 + # # Listen ip. + # listenIP: 0.0.0.0 + # Listen port. + # when this port is not available, manager will try next port. + port: + start: 65003 + end: 65003 + # # GRPC server tls configuration. + # tls: + # # CA certificate file path for mTLS. + # caCert: /etc/ssl/certs/ca.crt + # # Certificate file path for mTLS. + # cert: /etc/ssl/certs/server.crt + # # Key file path for mTLS. + # key: /etc/ssl/private/server.pem + # REST server configure + rest: + # REST server address + addr: :8080 + # # REST server tls configuration. + # tls: + # # Certificate file path. + # cert: /etc/ssl/certs/server.crt + # # Key file path. + # key: /etc/ssl/private/server.pem + # WorkHome is working directory. + # In linux, default value is /usr/local/dragonfly. + # In macos(just for testing), default value is /Users/$USER/.dragonfly. + workHome: '' + # logDir is the log directory. + # In linux, default value is /var/log/dragonfly. + # In macos(just for testing), default value is /Users/$USER/.dragonfly/logs. + logDir: '' + # cacheDir is dynconfig cache directory. + # In linux, default value is /var/cache/dragonfly. + # In macos(just for testing), default value is /Users/$USER/.dragonfly/cache. + cacheDir: '' + # pluginDir is the plugin directory. + # In linux, default value is /usr/local/dragonfly/plugins. + # In macos(just for testing), default value is /Users/$USER/.dragonfly/plugins. + pluginDir: '' + +# Auth configuration. +auth: + # JWT configuration used for sigining. + jwt: + # Realm name to display to the user, default value is Dragonfly. + realm: 'Dragonfly' + # Key is secret key used for signing, default value is + # encoded base64 of dragonfly. + # Please change the key in production. + key: 'ZHJhZ29uZmx5Cg==' + # Timeout is duration that a jwt token is valid, + # default duration is two days. + timeout: 48h + # MaxRefresh field allows clients to refresh their token + # until MaxRefresh has passed, default duration is two days. + maxRefresh: 48h + +# Database info used for server. +database: + # Database type, supported types include mysql, mariadb and postgres. + type: mysql + # Mysql configure. + mysql: + user: dragonfly + password: dragonfly + host: dragonfly + port: 3306 + dbname: manager + migrate: true + # Postgres configure. + postgres: + user: dragonfly + password: dragonfly + host: dragonfly + port: 5432 + dbname: manager + sslMode: disable + timezone: UTC + migrate: true + # tlsConfig: preferred + # tls: + # # Client certificate file path. + # cert: /etc/ssl/certs/cert.pem + # # Client key file path. + # key: /etc/ssl/private/key.pem + # # CA file path. + # ca: /etc/ssl/certs/ca.pem + # # Whether a client verifies the server's certificate chain and hostname. + # insecureSkipVerify: true + # Redis configure. + redis: + # Redis addresses. + addrs: + - dragonfly:6379 + # Redis sentinel master name. + masterName: '' + # Redis username. + username: '' + # Redis password. + password: '' + # Redis DB. + db: 0 + # Redis broker DB. + brokerDB: 1 + # Redis backend DB. + backendDB: 2 + +# Manager server cache. +cache: + # Redis cache configure. + redis: + # Cache ttl configure. + ttl: 5m + # Local cache configure. + local: + # LFU cache size. + size: 30000 + # Cache ttl configure. + ttl: 3m + +# Job configuration. +job: + # rateLimit configuration. + rateLimit: + # fillInterval is the interval for refilling the bucket. + fillInterval: 1m + # capacity is the maximum number of requests that can be consumed in a single fillInterval. + capacity: 5 + # quantum is the number of tokens taken from the bucket for each request. + quantum: 5 + # gc configuration. + gc: + # Interval is the interval for garbage collection. + interval: 3h + # TTL is the time to live for the job. + ttl: 6h + # Sync peers configuration. + syncPeers: + # Interval is the interval for syncing all peers information from the scheduler and + # display peers information in the manager console. + interval: 24h + # Timeout is the timeout for syncing peers information from the single scheduler. + timeout: 10m + # Preheat configuration. + preheat: + # registryTimeout is the timeout for requesting registry to get token and manifest. + registryTimeout: 1m + tls: + # insecureSkipVerify controls whether a client verifies the server's certificate chain and hostname. + insecureSkipVerify: false + # # caCert is the CA certificate for preheat tls handshake, it can be path or PEM format string. + # caCert: '' + +# Object storage service. +objectStorage: + # Enable object storage. + enable: false + # Name is object storage name of type, it can be s3, oss or obs. + name: s3 + # Region is storage region. + region: '' + # Endpoint is datacenter endpoint. + endpoint: '' + # AccessKey is access key ID. + accessKey: '' + # SecretKey is access key secret. + secretKey: '' + # s3ForcePathStyle sets force path style for s3, true by default. + # Set this to `true` to force the request to use path-style addressing, + # i.e., `http://s3.amazonaws.com/BUCKET/KEY`. By default, the S3 client + # will use virtual hosted bucket addressing when possible + # (`http://BUCKET.s3.amazonaws.com/KEY`). + # Refer to https://github.com/aws/aws-sdk-go/blob/main/aws/config.go#L118. + s3ForcePathStyle: true + +# Prometheus metrics. +metrics: + # Manager enable metrics service. + enable: true + # Metrics service address. + addr: ':8000' + # Enable peer gauge metrics. + enablePeerGauge: true + +# Network configuration. +network: + # Enable ipv6. + enableIPv6: false + +# Console shows log on console. +console: true + +# Whether to enable debug level logger and enable pprof. +verbose: true + +# Listen port for pprof, only valid when the verbose option is true +# default is -1. If it is 0, pprof will use a random port. +pprof-port: -1 + +# Jaeger endpoint url, like: http://jaeger.dragonfly.svc:14268/api/traces. +jaeger: '' +``` diff --git a/versioned_docs/version-v2.2.0/reference/configuration/scheduler.md b/versioned_docs/version-v2.2.0/reference/configuration/scheduler.md new file mode 100644 index 00000000..ce2df308 --- /dev/null +++ b/versioned_docs/version-v2.2.0/reference/configuration/scheduler.md @@ -0,0 +1,195 @@ +--- +id: scheduler +title: Scheduler +slug: /reference/configuration/scheduler/ +--- + +## Configure Scheduler YAML File {#configure-scheduler-yaml-file} + +The default path for the scheduler yaml configuration file is `/etc/dragonfly/scheduler.yaml` in linux, +and the default path is `$HOME/.dragonfly/config/scheduler.yaml` in darwin. + +```yaml +# Server scheduler instance configuration. +server: +# # Access ip for other services, +# # when local ip is different with access ip, advertiseIP should be set. +# advertiseIP: 127.0.0.1 +# # Access port for other services, +# # when local ip is different with access port, advertisePort should be set. +# advertisePort: 8002 +# # Listen ip. +# listenIP: 0.0.0.0 +# Port is the ip and port scheduler server listens on. + port: 8002 +# # GRPC server tls configuration. +# tls: +# # CA certificate file path for mTLS. +# caCert: /etc/ssl/certs/ca.crt +# # Certificate file path for mTLS. +# cert: /etc/ssl/certs/server.crt +# # Key file path for mTLS. +# key: /etc/ssl/private/server.pem +# # Server host. +# host: localhost + # WorkHome is working directory. + # In linux, default value is /usr/local/dragonfly. + # In macos(just for testing), default value is /Users/$USER/.dragonfly. + workHome: '' + # logDir is the log directory. + # In linux, default value is /var/log/dragonfly. + # In macos(just for testing), default value is /Users/$USER/.dragonfly/logs. + logDir: '' + # cacheDir is dynconfig cache directory. + # In linux, default value is /var/cache/dragonfly. + # In macos(just for testing), default value is /Users/$USER/.dragonfly/cache. + cacheDir: '' + # pluginDir is the plugin directory. + # In linux, default value is /usr/local/dragonfly/plugins. + # In macos(just for testing), default value is /Users/$USER/.dragonfly/plugins. + pluginDir: '' + # dataDir is the directory. + # In linux, default value is /var/lib/dragonfly. + # In macos(just for testing), default value is /Users/$USER/.dragonfly/data. + dataDir: '' + +# Scheduler policy configuration. +scheduler: + # Algorithm configuration to use different scheduling algorithms, + # default configuration supports "default" and "ml" + # "default" is the rule-based scheduling algorithm, + # "ml" is the machine learning scheduling algorithm + # It also supports user plugin extension, the algorithm value is "plugin", + # and the compiled `d7y-scheduler-plugin-evaluator.so` file is added to + # the dragonfly working directory plugins. + algorithm: default + # backToSourceCount is single task allows the peer to back-to-source count. + backToSourceCount: 200 + # retryBackToSourceLimit reaches the limit, then the peer back-to-source. + retryBackToSourceLimit: 3 + # Retry scheduling limit times. + retryLimit: 5 + # Retry scheduling interval. + retryInterval: 400ms + # GC metadata configuration. + gc: + # pieceDownloadTimeout is the timeout of downloading piece. + pieceDownloadTimeout: 30m + # peerGCInterval is the interval of peer gc. + peerGCInterval: 10s + # peerTTL is the ttl of peer. If the peer has been downloaded by other peers, + # then PeerTTL will be reset + peerTTL: 24h + # taskGCInterval is the interval of task gc. If all the peers have been reclaimed in the task, + # then the task will also be reclaimed. + taskGCInterval: 30m + # hostGCInterval is the interval of host gc. + hostGCInterval: 6h + # hostTTL is time to live of host. If host announces message to scheduler, + # then HostTTl will be reset. + hostTTL: 1h + +# Database info used for server. +database: + # Redis configuration. + redis: + # Redis addresses. + addrs: + - redis-servive:6379 + # Redis sentinel master name. + masterName: '' + # Redis username. + username: '' + # Redis password. + password: '' + # Redis broker DB. + brokerDB: 1 + # Redis backend DB. + backendDB: 2 + +# Dynamic data configuration. +dynConfig: + # Dynamic config refresh interval. + refreshInterval: 1m + +# Scheduler host configuration. +host: + # idc is the idc of scheduler instance. + idc: '' + # location is the location of scheduler instance. + location: '' + +# Manager configuration. +manager: + # addr is manager access address. + addr: manager-service:65003 + # schedulerClusterID cluster id to which scheduler instance belongs. + schedulerClusterID: 1 + # keepAlive keep alive configuration. + keepAlive: + # KeepAlive interval. + interval: 5s +# # GRPC client tls configuration. +# tls: +# # CA certificate file path for mTLS. +# caCert: /etc/ssl/certs/ca.crt +# # Certificate file path for mTLS. +# cert: /etc/ssl/certs/client.crt +# # Key file path for mTLS. +# key: /etc/ssl/private/client.pem + +# Seed peer configuration. +seedPeer: + # Scheduler enable seed peer as P2P peer, + # if the value is false, P2P network will not be back-to-source through + # seed peer but by peer and preheat feature does not work. + enable: true + +# Machinery async job configuration, +# see https://github.com/RichardKnop/machinery. +job: + # Scheduler enable job service. + enable: true + # Number of workers in global queue. + globalWorkerNum: 500 + # Number of workers in scheduler queue. + schedulerWorkerNum: 500 + # Number of workers in local queue. + localWorkerNum: 1000 + +# Store task download information. +storage: + # maxSize sets the maximum size in megabytes of storage file. + maxSize: 100 + # maxBackups sets the maximum number of storage files to retain. + maxBackups: 10 + # bufferSize sets the size of buffer container, + # if the buffer is full, write all the records in the buffer to the file. + bufferSize: 100 + +# Enable prometheus metrics. +metrics: + # Scheduler enable metrics service. + enable: true + # Metrics service address. + addr: ':8000' + # Enable host metrics. + enableHost: false + +network: + # Enable ipv6. + enableIPv6: false + +# Console shows log on console. +console: true + +# Whether to enable debug level logger and enable pprof. +verbose: true + +# Listen port for pprof, only valid when the verbose option is true +# default is -1. If it is 0, pprof will use a random port. +pprof-port: -1 + +# Jaeger endpoint url, like: http://jaeger.dragonfly.svc:14268/api/traces. +jaeger: '' +``` diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/add-token-to-open-api.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/add-token-to-open-api.png new file mode 100644 index 00000000..6f343f10 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/add-token-to-open-api.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/copy-token.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/copy-token.png new file mode 100644 index 00000000..db585c69 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/copy-token.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/create-token.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/create-token.png new file mode 100644 index 00000000..02e53fe9 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/create-token.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/delete-token.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/delete-token.png new file mode 100644 index 00000000..32d21cd9 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/delete-token.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/tokens.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/tokens.png new file mode 100644 index 00000000..4bb2456f Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/tokens.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/update-token.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/update-token.png new file mode 100644 index 00000000..83dea2ef Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/update-token.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/verify-headers.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/verify-headers.png new file mode 100644 index 00000000..b13c5de6 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/verify-headers.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/verify-request.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/verify-request.png new file mode 100644 index 00000000..e688476b Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/personal-access-tokens/verify-request.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/create-instance.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/create-instance.png new file mode 100644 index 00000000..16c1640f Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/create-instance.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/create-policy.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/create-policy.png new file mode 100644 index 00000000..77a05bc7 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/create-policy.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/create-preheat.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/create-preheat.png new file mode 100644 index 00000000..b5f1ebc7 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/create-preheat.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/create-token.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/create-token.png new file mode 100644 index 00000000..3bb63d19 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/create-token.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/exectu-preheat.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/exectu-preheat.png new file mode 100644 index 00000000..7b1a21a6 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/exectu-preheat.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/executions-success.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/executions-success.png new file mode 100644 index 00000000..2154ca1d Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/executions-success.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/executions.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/executions.png new file mode 100644 index 00000000..45d1ad85 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/executions.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/failure-preheat.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/failure-preheat.png new file mode 100644 index 00000000..63f9f166 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/failure-preheat.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/instance.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/instance.png new file mode 100644 index 00000000..a5b2f094 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/instance.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/log.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/log.png new file mode 100644 index 00000000..f9baa0a2 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/log.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/open-api-token.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/open-api-token.png new file mode 100644 index 00000000..26f379d1 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/open-api-token.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/p2p-preheat.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/p2p-preheat.png new file mode 100644 index 00000000..ea03ea05 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/p2p-preheat.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/pending-preheat.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/pending-preheat.png new file mode 100644 index 00000000..0a927acf Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/pending-preheat.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/preheats.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/preheats.png new file mode 100644 index 00000000..7733c051 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/preheats.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/success-preheat.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/success-preheat.png new file mode 100644 index 00000000..c5892a09 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/preheat/success-preheat.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/task/create-token.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/create-token.png new file mode 100644 index 00000000..c7361031 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/create-token.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/task/delete-task.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/delete-task.png new file mode 100644 index 00000000..c908506f Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/delete-task.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/task/error-log.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/error-log.png new file mode 100644 index 00000000..3b398619 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/error-log.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/task/executions.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/executions.png new file mode 100644 index 00000000..d88f8a86 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/executions.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/task/failure-task.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/failure-task.png new file mode 100644 index 00000000..e6b467b3 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/failure-task.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/task/pending-task.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/pending-task.png new file mode 100644 index 00000000..ba6fcc1e Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/pending-task.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/task/search-task-by-task-id.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/search-task-by-task-id.png new file mode 100644 index 00000000..6bc389ce Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/search-task-by-task-id.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/task/search-task-by-url.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/search-task-by-url.png new file mode 100644 index 00000000..adb18ba7 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/search-task-by-url.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/task/success-task.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/success-task.png new file mode 100644 index 00000000..13f38ea9 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/task/success-task.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/cluster.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/cluster.png new file mode 100644 index 00000000..98827f70 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/cluster.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/clusters.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/clusters.png new file mode 100644 index 00000000..f7899b12 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/clusters.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/create-cluster.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/create-cluster.png new file mode 100644 index 00000000..68b6d50c Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/create-cluster.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-cluster.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-cluster.png new file mode 100644 index 00000000..74f56b3e Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-cluster.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-inactive-scheduler.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-inactive-scheduler.png new file mode 100644 index 00000000..d7004849 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-inactive-scheduler.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-inactive-seed-peer.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-inactive-seed-peer.png new file mode 100644 index 00000000..3bc65319 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-inactive-seed-peer.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-scheduler.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-scheduler.png new file mode 100644 index 00000000..8a5e733b Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-scheduler.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-seed-peer.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-seed-peer.png new file mode 100644 index 00000000..f4a44e1a Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/delete-seed-peer.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/export-peer.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/export-peer.png new file mode 100644 index 00000000..57ff295f Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/export-peer.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/peers.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/peers.png new file mode 100644 index 00000000..859d1f4c Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/peers.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/refresh-peer.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/refresh-peer.png new file mode 100644 index 00000000..f826b922 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/refresh-peer.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/scheduler.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/scheduler.png new file mode 100644 index 00000000..4fde076b Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/scheduler.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/schedulers.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/schedulers.png new file mode 100644 index 00000000..6eeedeb5 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/schedulers.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/seed-peer.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/seed-peer.png new file mode 100644 index 00000000..e1e67132 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/seed-peer.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/seed-peers.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/seed-peers.png new file mode 100644 index 00000000..5c40bc1b Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/seed-peers.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/update-cluster.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/update-cluster.png new file mode 100644 index 00000000..985c8036 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/cluster/update-cluster.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/developer/personal-access-tokens/create-token.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/developer/personal-access-tokens/create-token.png new file mode 100644 index 00000000..6dedb6dd Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/developer/personal-access-tokens/create-token.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/developer/personal-access-tokens/delete-token.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/developer/personal-access-tokens/delete-token.png new file mode 100644 index 00000000..f6872ef4 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/developer/personal-access-tokens/delete-token.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/developer/personal-access-tokens/tokens.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/developer/personal-access-tokens/tokens.png new file mode 100644 index 00000000..4bb2456f Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/developer/personal-access-tokens/tokens.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/developer/personal-access-tokens/update-token.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/developer/personal-access-tokens/update-token.png new file mode 100644 index 00000000..83dea2ef Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/developer/personal-access-tokens/update-token.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/insight/peer/export-peer.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/insight/peer/export-peer.png new file mode 100644 index 00000000..90b8cbdf Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/insight/peer/export-peer.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/insight/peer/peers.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/insight/peer/peers.png new file mode 100644 index 00000000..fe436e1e Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/insight/peer/peers.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/login/signin.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/login/signin.png new file mode 100644 index 00000000..59411d2f Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/login/signin.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/login/signup.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/login/signup.png new file mode 100644 index 00000000..3f829825 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/login/signup.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/change-password.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/change-password.png new file mode 100644 index 00000000..8fa81eaf Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/change-password.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/profile.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/profile.png new file mode 100644 index 00000000..f25f70e2 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/profile.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/update-profile.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/update-profile.png new file mode 100644 index 00000000..a22aba08 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/update-profile.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/update-user-role.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/update-user-role.png new file mode 100644 index 00000000..194389a2 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/update-user-role.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/user.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/user.png new file mode 100644 index 00000000..d13b24cd Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/user.png differ diff --git a/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/users.png b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/users.png new file mode 100644 index 00000000..2a8d7775 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/advanced-guides/web-console/user/users.png differ diff --git a/versioned_docs/version-v2.2.0/resource/architecture/manage-multiple-p2p-networks.png b/versioned_docs/version-v2.2.0/resource/architecture/manage-multiple-p2p-networks.png new file mode 100644 index 00000000..a0a87297 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/architecture/manage-multiple-p2p-networks.png differ diff --git a/versioned_docs/version-v2.2.0/resource/architecture/scheduler-dag.png b/versioned_docs/version-v2.2.0/resource/architecture/scheduler-dag.png new file mode 100644 index 00000000..7e1f6c32 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/architecture/scheduler-dag.png differ diff --git a/versioned_docs/version-v2.2.0/resource/architecture/scheduler-state-machine.jpg b/versioned_docs/version-v2.2.0/resource/architecture/scheduler-state-machine.jpg new file mode 100644 index 00000000..99d9708a Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/architecture/scheduler-state-machine.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/concepts/arch.png b/versioned_docs/version-v2.2.0/resource/concepts/arch.png new file mode 100644 index 00000000..57f9617b Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/concepts/arch.png differ diff --git a/versioned_docs/version-v2.2.0/resource/concepts/dfcache-delete.jpg b/versioned_docs/version-v2.2.0/resource/concepts/dfcache-delete.jpg new file mode 100644 index 00000000..eca3aad0 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/concepts/dfcache-delete.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/concepts/dfcache-export.jpg b/versioned_docs/version-v2.2.0/resource/concepts/dfcache-export.jpg new file mode 100644 index 00000000..74d9171d Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/concepts/dfcache-export.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/concepts/dfcache-import.jpg b/versioned_docs/version-v2.2.0/resource/concepts/dfcache-import.jpg new file mode 100644 index 00000000..bfe61a40 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/concepts/dfcache-import.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/concepts/dfcache-stat.jpg b/versioned_docs/version-v2.2.0/resource/concepts/dfcache-stat.jpg new file mode 100644 index 00000000..e085cb6a Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/concepts/dfcache-stat.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/concepts/dfstore-get-object-hit-backend.jpg b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-get-object-hit-backend.jpg new file mode 100644 index 00000000..5a5f6649 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-get-object-hit-backend.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/concepts/dfstore-get-object-hit-other-peer.jpg b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-get-object-hit-other-peer.jpg new file mode 100644 index 00000000..62b7dc8f Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-get-object-hit-other-peer.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/concepts/dfstore-get-object-hit-peer.jpg b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-get-object-hit-peer.jpg new file mode 100644 index 00000000..3e3cf90c Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-get-object-hit-peer.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/concepts/dfstore-get-object.png b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-get-object.png new file mode 100644 index 00000000..2e066582 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-get-object.png differ diff --git a/versioned_docs/version-v2.2.0/resource/concepts/dfstore-put-object-async.jpg b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-put-object-async.jpg new file mode 100644 index 00000000..62dc6a2d Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-put-object-async.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/concepts/dfstore-put-object-sync.jpg b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-put-object-sync.jpg new file mode 100644 index 00000000..5c39b9bb Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-put-object-sync.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/concepts/dfstore-put-object.png b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-put-object.png new file mode 100644 index 00000000..587c3f3c Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/concepts/dfstore-put-object.png differ diff --git a/versioned_docs/version-v2.2.0/resource/getting-started/cluster-a.png b/versioned_docs/version-v2.2.0/resource/getting-started/cluster-a.png new file mode 100644 index 00000000..3e1d97f8 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/getting-started/cluster-a.png differ diff --git a/versioned_docs/version-v2.2.0/resource/getting-started/cluster-b-information.png b/versioned_docs/version-v2.2.0/resource/getting-started/cluster-b-information.png new file mode 100644 index 00000000..6fcca1b8 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/getting-started/cluster-b-information.png differ diff --git a/versioned_docs/version-v2.2.0/resource/getting-started/clusters.png b/versioned_docs/version-v2.2.0/resource/getting-started/clusters.png new file mode 100644 index 00000000..0900a8c3 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/getting-started/clusters.png differ diff --git a/versioned_docs/version-v2.2.0/resource/getting-started/create-cluster-b-successfully.png b/versioned_docs/version-v2.2.0/resource/getting-started/create-cluster-b-successfully.png new file mode 100644 index 00000000..f5d9aac8 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/getting-started/create-cluster-b-successfully.png differ diff --git a/versioned_docs/version-v2.2.0/resource/getting-started/create-cluster-b.png b/versioned_docs/version-v2.2.0/resource/getting-started/create-cluster-b.png new file mode 100644 index 00000000..6881e86c Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/getting-started/create-cluster-b.png differ diff --git a/versioned_docs/version-v2.2.0/resource/getting-started/install-cluster-b-successfully.png b/versioned_docs/version-v2.2.0/resource/getting-started/install-cluster-b-successfully.png new file mode 100644 index 00000000..a924ae25 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/getting-started/install-cluster-b-successfully.png differ diff --git a/versioned_docs/version-v2.2.0/resource/getting-started/installation/manager-console.png b/versioned_docs/version-v2.2.0/resource/getting-started/installation/manager-console.png new file mode 100644 index 00000000..f6de8cbb Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/getting-started/installation/manager-console.png differ diff --git a/versioned_docs/version-v2.2.0/resource/getting-started/multi-cluster-kubernetes.png b/versioned_docs/version-v2.2.0/resource/getting-started/multi-cluster-kubernetes.png new file mode 100644 index 00000000..e1ea64a0 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/getting-started/multi-cluster-kubernetes.png differ diff --git a/versioned_docs/version-v2.2.0/resource/getting-started/sequence-diagram.png b/versioned_docs/version-v2.2.0/resource/getting-started/sequence-diagram.png new file mode 100644 index 00000000..9e63cf6e Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/getting-started/sequence-diagram.png differ diff --git a/versioned_docs/version-v2.2.0/resource/getting-started/signin.png b/versioned_docs/version-v2.2.0/resource/getting-started/signin.png new file mode 100644 index 00000000..59411d2f Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/getting-started/signin.png differ diff --git a/versioned_docs/version-v2.2.0/resource/introduction/features.jpeg b/versioned_docs/version-v2.2.0/resource/introduction/features.jpeg new file mode 100644 index 00000000..59cca6da Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/introduction/features.jpeg differ diff --git a/versioned_docs/version-v2.2.0/resource/introduction/milestone.jpeg b/versioned_docs/version-v2.2.0/resource/introduction/milestone.jpeg new file mode 100644 index 00000000..b9c168b8 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/introduction/milestone.jpeg differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/best-practices/deployment-best-practices/cluster.png b/versioned_docs/version-v2.2.0/resource/operations/best-practices/deployment-best-practices/cluster.png new file mode 100644 index 00000000..5bb5fc3a Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/best-practices/deployment-best-practices/cluster.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/best-practices/deployment-best-practices/update-cluster.png b/versioned_docs/version-v2.2.0/resource/operations/best-practices/deployment-best-practices/update-cluster.png new file mode 100644 index 00000000..2ead0a1b Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/best-practices/deployment-best-practices/update-cluster.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-import-dashboard.jpg b/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-import-dashboard.jpg new file mode 100644 index 00000000..0e848b73 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-import-dashboard.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-login.jpg b/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-login.jpg new file mode 100644 index 00000000..d880e249 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-login.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-manager.jpg b/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-manager.jpg new file mode 100644 index 00000000..7b80d152 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-manager.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-peer.jpg b/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-peer.jpg new file mode 100644 index 00000000..b60c5b82 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-peer.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-scheduler.jpg b/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-scheduler.jpg new file mode 100644 index 00000000..176f7774 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-scheduler.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-validate-metrics.jpg b/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-validate-metrics.jpg new file mode 100644 index 00000000..7d0ce82c Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/best-practices/observability/monitoring/grafana-validate-metrics.jpg differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/best-practices/security/external-attackers.png b/versioned_docs/version-v2.2.0/resource/operations/best-practices/security/external-attackers.png new file mode 100644 index 00000000..b4c2356f Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/best-practices/security/external-attackers.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/best-practices/security/internal-attackers.png b/versioned_docs/version-v2.2.0/resource/operations/best-practices/security/internal-attackers.png new file mode 100644 index 00000000..3bc5944a Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/best-practices/security/internal-attackers.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/git-lfs-download.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/git-lfs-download.png new file mode 100644 index 00000000..15897594 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/git-lfs-download.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/git-lfs-dragonfly.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/git-lfs-dragonfly.png new file mode 100644 index 00000000..fb9e5ce4 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/git-lfs-dragonfly.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/git-lfs-p2p.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/git-lfs-p2p.png new file mode 100644 index 00000000..88bdc1a0 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/git-lfs-p2p.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/git-lfs.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/git-lfs.png new file mode 100644 index 00000000..59e56c7e Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/git-lfs.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/hugging-face-download.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/hugging-face-download.png new file mode 100644 index 00000000..aaceada9 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/hugging-face-download.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/hugging-face-dragonfly.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/hugging-face-dragonfly.png new file mode 100644 index 00000000..72b63fea Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/hugging-face-dragonfly.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/hugging-face-p2p.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/hugging-face-p2p.png new file mode 100644 index 00000000..e9a65940 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/hugging-face-p2p.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/nydus-mirror-dragonfly.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/nydus-mirror-dragonfly.png new file mode 100644 index 00000000..04992a8e Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/nydus-mirror-dragonfly.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/stargz-mirror-dragonfly.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/stargz-mirror-dragonfly.png new file mode 100644 index 00000000..6d9c56cc Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/stargz-mirror-dragonfly.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-download.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-download.png new file mode 100644 index 00000000..247067da Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-download.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-dragonfly-architecture.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-dragonfly-architecture.png new file mode 100644 index 00000000..bf5dcf8f Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-dragonfly-architecture.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-dragonfly-integration.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-dragonfly-integration.png new file mode 100644 index 00000000..edf02d64 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-dragonfly-integration.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-dragonfly.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-dragonfly.png new file mode 100644 index 00000000..dd98efd2 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-dragonfly.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-p2p.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-p2p.png new file mode 100644 index 00000000..6ccd47a7 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/torchserve-p2p.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/triton-server-download.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/triton-server-download.png new file mode 100644 index 00000000..56b04102 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/triton-server-download.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/triton-server-dragonfly.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/triton-server-dragonfly.png new file mode 100644 index 00000000..522a7289 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/triton-server-dragonfly.png differ diff --git a/versioned_docs/version-v2.2.0/resource/operations/integrations/triton-server-p2p.png b/versioned_docs/version-v2.2.0/resource/operations/integrations/triton-server-p2p.png new file mode 100644 index 00000000..3902b676 Binary files /dev/null and b/versioned_docs/version-v2.2.0/resource/operations/integrations/triton-server-p2p.png differ diff --git a/versioned_docs/version-v2.2.0/roadmap/v2.0.md b/versioned_docs/version-v2.2.0/roadmap/v2.0.md new file mode 100644 index 00000000..7c882dc8 --- /dev/null +++ b/versioned_docs/version-v2.2.0/roadmap/v2.0.md @@ -0,0 +1,43 @@ +--- +id: roadmap-v2.0 +title: v2.0 +slug: /roadmap-v2.0/ +--- + +Manager: + +- Console + - Refactor project. + - Optimize permissions management. + - Optimize console UI, page visual revision. + - Add actions such as lint and test. + - Add development documentation. + - Add dynamic configurations. + - Add P2P data visualization. +- Provides initial installation page. +- Optimized calculations to match scheduler cluster rules. +- Common configurations are managed dynamically through the manager. +- Application-level speed limit and other configurations. +- Added open api interface authentication. + +Scheduler: + +- Improve scheduling stability and collect metrics during scheduling. +- Scheduler integrates machine learning algorithms to improve scheduling capabilities. +- Allocate download peers based on peer bandwidth traffic. + +Client: + +- Support seed peer feature. +- Improve task download efficiency and stability. +- Refactoring to use GRPC bidirectional stream for piece information passing between peers. +- Support piece download priority. + +Document: + +- Refactored d7y.io website and added dragonfly 2.0 documentation. + +Others: + +- Provide performance testing solutions in perf-tests repo. +- Upgrade golang 1.18, refactor the project using the generic feature. diff --git a/versioned_docs/version-v2.2.0/roadmap/v2.1.md b/versioned_docs/version-v2.2.0/roadmap/v2.1.md new file mode 100644 index 00000000..c18de399 --- /dev/null +++ b/versioned_docs/version-v2.2.0/roadmap/v2.1.md @@ -0,0 +1,42 @@ +--- +id: roadmap-v2.1 +title: v2.1 +slug: /roadmap-v2.1/ +--- + +Manager: + +- Console [v1.0.0](https://github.com/dragonflyoss/console/tree/release-1.0.0) is released and it provides + a new console for users to operate Dragonfly. +- Provides the ability to control the features of the scheduler in the manager. If the scheduler preheat feature is + not in feature flags, then it will stop providing the preheating in the scheduler. +- Add personal access tokens feature in the manager and personal access token + contains your security credentials for the restful open api. +- Add TLS config to manager rest server. +- Add cluster in the manager and the cluster contains a scheduler cluster and a seed peer cluster. +- Use unscoped delete when destroying the manager's resources. +- Add uk_scheduler index and uk_seed_peer index in the table of the database. +- Remove security domain feature and security feature in the manager. +- Add advertise port config. + +Scheduler: + +- Add network topology feature and it can probe the network latency between peers, providing better scheduling capabilities. +- Scheduler adds database field in config and moves the redis config to database field. +- Fix filtering and evaluation in scheduling. Since the final length of the filter is + the candidateParentLimit used, the parents after the filter is wrong. +- Fix storage can not write records to file when bufferSize is zero. +- Add advertise port config. +- Fix fsm changes state failed when register task. + +Client: + +- Dfstore adds GetObjectMetadatas and CopyObject to supports using Dragonfly as the JuiceFS backend. +- Fix dfdaemon fails to start when there is no available scheduler address. +- Fix object downloads failed by dfstore when dfdaemon enabled concurrent. +- Replace net.Dial with grpc health check in dfdaemon. + +Others: + +- A third party security audit was performed by Trail of Bits, you can see the full report [here](https://github.com/dragonflyoss/dragonfly/blob/main/docs/security/dragonfly-comprehensive-report-2023.pdf). +- Hiding sensitive information in logs, such as the token in the header. diff --git a/versioned_docs/version-v2.2.0/roadmap/v2.2.md b/versioned_docs/version-v2.2.0/roadmap/v2.2.md new file mode 100644 index 00000000..aca040c6 --- /dev/null +++ b/versioned_docs/version-v2.2.0/roadmap/v2.2.md @@ -0,0 +1,38 @@ +--- +id: roadmap-v2.2 +title: v2.2 +slug: /roadmap-v2.2/ +--- + +Manager: + +- Add clearing P2P task cache. +- Peer information display, including CPU, Memory, etc. + +Scheduler: + +- Optimize scheduling algorithm and improve bandwidth utilization in the P2P network. + +Client: + +- Client written in Rust, reduce CPU usage and Memory usage. + +Others: + +- Defines the V2 of the P2P transfer protocol. + +Document: + +- Restructure the document to make it easier for users to use. +- Enhance the landing page UI. + +AI Infrastructure: + +- Supports Triton Inference Server to accelerate model distribution, refer to [dragonfly-repository-agent](../operations/integrations/triton-server.md). +- Supports TorchServer to accelerate model distribution, refer to [document](../operations/integrations/torchserve.md). +- Supports HuggingFace to accelerate model distribution and dataset distribution, refer to [document](../operations/integrations/hugging-face.md). +- Supports Git LFS to accelerate file distribution, refer to [document](../operations/integrations/git-lfs.md). +- Supports JuiceFS to accelerate file downloads from object storage, JuiceFS read requests via + peer proxy and write requests via the default client of object storage. +- Supports Fluid to accelerate model distribution. +- Support AI infrastructure to efficiently distribute models and datasets, and integrated with the AI ecosystem. diff --git a/versioned_docs/version-v2.2.0/roadmap/v2.3.md b/versioned_docs/version-v2.2.0/roadmap/v2.3.md new file mode 100644 index 00000000..30e9e9a7 --- /dev/null +++ b/versioned_docs/version-v2.2.0/roadmap/v2.3.md @@ -0,0 +1,43 @@ +--- +id: roadmap-v2.3 +title: v2.3 +slug: /roadmap-v2.3/ +--- + +Manager: + +- Configure scheduling weights. +- Support scopes for Personal Access Tokens (PATs). +- Regularly clean up inactive schedulers and seed peers. +- Display more Peer information in the console, such as CPU and memory usage. +- Display persistent cache information of peers in the console. +- Add management of sync peers in the console. + +Scheduler: + +- Optimize the scheduling algorithm to improve bandwidth utilization in the P2P network. + +Client: + +- Support RDMA/QUIC for faster network transmission in the P2P network, enhancing the loading of + AI inference models into memory. +- Define a codable protocol for data transmission, providing faster encoding/decoding. +- Support persistent cache, allowing access within the P2P cluster without uploading to other storage, + facilitating faster read/write of AI models and datasets. +- Allow peers to get the QoS of parents and select the optimal parents for downloading. +- Preheat files in the memory cache to improve download speed. + +Others: + +- Add more performance tests in the dfbench command. +- Add more E2E tests and unit tests. + +Documentation: + +- Restructure the documentation to make it easier for users to navigate. +- Enhance the landing page UI. + +AI Infrastructure: + +- Optimize large file distribution within the infrastructure. +- Optimize handling of a large number of small I/Os for Nydus. diff --git a/versioned_docs/version-v2.2.0/roadmap/v2.4.md b/versioned_docs/version-v2.2.0/roadmap/v2.4.md new file mode 100644 index 00000000..c66a18d3 --- /dev/null +++ b/versioned_docs/version-v2.2.0/roadmap/v2.4.md @@ -0,0 +1,36 @@ +--- +id: roadmap-v2.4 +title: v2.4 +slug: /roadmap-v2.4/ +--- + +Manager + +- Optimize memory and CPU usage. +- Add more features to the console. +- Provide more open APIs for the console. + +Scheduler + +- Optimize the scheduling algorithm to improve bandwidth utilization in the P2P network. + +Client + +- Support P2P for RDMA-based memory storage. +- Add distributed addressing, allowing deployment without relying on the manager and scheduler. +- Optimize file transfer speed in the P2P network. + +Others + +- Add more performance tests in the `dfbench` command. +- Add more E2E tests and unit tests. + +Documentation + +- Restructure the documentation to make it easier for users to navigate. +- Enhance the landing page UI. + +AI Infrastructure + +- Optimize large file distribution within the infrastructure. +- Optimize handling of a large number of small I/Os for Nydus. diff --git a/versioned_sidebars/version-v2.2.0-sidebars.json b/versioned_sidebars/version-v2.2.0-sidebars.json new file mode 100644 index 00000000..8d1e25c4 --- /dev/null +++ b/versioned_sidebars/version-v2.2.0-sidebars.json @@ -0,0 +1,219 @@ +{ + "docs": [ + { + "type": "doc", + "id": "introduction" + }, + { + "type": "category", + "label": "Getting Started", + "items": [ + { + "type": "category", + "label": "Quick Start", + "link": { + "type": "doc", + "id": "getting-started/quick-start" + }, + "items": [ + { + "type": "autogenerated", + "dirName": "getting-started/quick-start" + } + ] + }, + { + "type": "category", + "label": "Installation", + "items": ["getting-started/installation/helm-charts", "getting-started/installation/binaries"] + } + ] + }, + { + "type": "category", + "label": "Operations", + "items": [ + { + "type": "category", + "label": "Best Practices", + "items": [ + "operations/best-practices/deployment-best-practices", + { + "type": "category", + "label": "Observability", + "items": [ + { + "type": "autogenerated", + "dirName": "operations/best-practices/observability" + } + ] + }, + { + "type": "category", + "label": "Security", + "items": [ + { + "type": "autogenerated", + "dirName": "operations/best-practices/security" + } + ] + } + ] + }, + { + "type": "category", + "label": "Deployment", + "items": [ + "operations/deployment/architecture", + { + "type": "category", + "label": "Applications", + "items": [ + "operations/deployment/applications/manager", + "operations/deployment/applications/scheduler", + "operations/deployment/applications/client" + ] + } + ] + }, + { + "type": "category", + "label": "Integrations", + "items": [ + { + "type": "category", + "label": "Container Runtime", + "items": [ + "operations/integrations/container-runtime/containerd", + "operations/integrations/container-runtime/singularity", + "operations/integrations/container-runtime/docker", + "operations/integrations/container-runtime/cri-o", + "operations/integrations/container-runtime/podman", + "operations/integrations/container-runtime/nydus", + "operations/integrations/container-runtime/stargz" + ] + }, + "operations/integrations/harbor", + "operations/integrations/hugging-face", + "operations/integrations/git-lfs", + "operations/integrations/torchserve", + "operations/integrations/triton-server", + "operations/integrations/upgrade" + ] + } + ] + }, + { + "type": "category", + "label": "Reference", + "items": [ + { + "type": "category", + "label": "Configuration", + "items": [ + "reference/configuration/manager", + "reference/configuration/scheduler", + { + "type": "category", + "label": "Client", + "items": [ + { + "type": "autogenerated", + "dirName": "reference/configuration/client" + } + ] + } + ] + }, + { + "type": "category", + "label": "Commands", + "items": [ + "reference/commands/manager", + "reference/commands/scheduler", + { + "type": "category", + "label": "Client", + "items": [ + { + "type": "autogenerated", + "dirName": "reference/commands/client" + } + ] + } + ] + } + ] + }, + { + "type": "category", + "label": "Advanced Guides", + "items": [ + { + "type": "category", + "label": "Web Console", + "link": { + "type": "doc", + "id": "advanced-guides/web-console" + }, + "items": [ + "advanced-guides/web-console/signin", + "advanced-guides/web-console/signup", + "advanced-guides/web-console/cluster", + { + "type": "category", + "label": "Developer", + "items": [ + { + "type": "autogenerated", + "dirName": "advanced-guides/web-console/developer" + } + ] + }, + { + "type": "category", + "label": "Job", + "items": [ + { + "type": "autogenerated", + "dirName": "advanced-guides/web-console/job" + } + ] + }, + "advanced-guides/web-console/user" + ] + }, + "advanced-guides/preheat", + "advanced-guides/task-manager", + "advanced-guides/personal-access-tokens" + ] + }, + { + "type": "category", + "label": "Development Guides", + "items": [ + "development-guide/configure-development-environment", + { + "type": "category", + "label": "Plugins", + "items": ["development-guide/plugins/out-of-tree-plugins", "development-guide/plugins/in-tree-plugin"] + }, + "development-guide/running-tests" + ] + }, + { + "type": "category", + "label": "Roadmap", + "items": [ + { + "type": "autogenerated", + "dirName": "roadmap" + } + ] + }, + { + "type": "doc", + "id": "faq" + } + ] +} diff --git a/versions.json b/versions.json index 8653076c..db60db72 100644 --- a/versions.json +++ b/versions.json @@ -1 +1 @@ -["v2.1.x", "v2.1.0", "v2.0.9", "v2.0.8", "v2.0.7", "v2.0.6", "v2.0.5", "v2.0.4", "v2.0.3", "v2.0.2"] +["v2.2.0", "v2.1.x", "v2.1.0", "v2.0.9", "v2.0.8", "v2.0.7", "v2.0.6", "v2.0.5", "v2.0.4", "v2.0.3", "v2.0.2"]