Modernize instructlab deploys to v0.18 and beyond (#8831)

redhat-cop · Nov 15, 2024 · 8eb2b72 · 8eb2b72
1 parent 3fce44b
commit 8eb2b72
Show file tree

Hide file tree

Showing 8 changed files with 306 additions and 0 deletions.
diff --git a/ansible/roles/ai_setup_instructlab/README.adoc b/ansible/roles/ai_setup_instructlab/README.adoc
@@ -0,0 +1,60 @@
+= setup_instructlab
+
+This role will install instructlab on a RHEL or Fedora machine.
+Currently it is opininated and assumes that the machine has an Nvidia GPU and CUDA
+
+It currently supports:
+
+- RHEL 9 (tested aginst RHEL 9.3)
+- Fedora (tested against Fedora 39)
+
+In addition it stores a small number of pre-requsities (e.g. `gcc`) that are required for the installation of the NVIDIA drivers and CUDA Toolkit.
+
+== Role Variables
+
+This role is entirely self container ie is _fire and forget_ and does not require any variables to be set.
+
+However the following link:./defaults/main.yml[variables] can be set to control the installation:
++
+
+[source,sh]
+----
+
+setup_instructlab_repo_url: "https://github.com/instructlab/instructlab"
+setup_instructlab_taxonomy_repo_url: "https://github.com/instructlab/taxonomy"
+setup_instructlab_user: "instruct"
+setup_instructlab_install_path_base: "/home/{{ setup_instructlab_user }}"  # Change this to your path
+setup_instructlab_cuda_home: "/usr/local/cuda"
+setup_instructlab_cuda_lib_path: "/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
+
+setup_instructlab_developer_packages:
+  - g++
+  - gcc
+  - git
+  - make
+  - python3.11
+----
+
+== Dependencies
+
+None
+
+== Example Playbook
+
+Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too:
+
+[source,yaml]
+----
+---
+- name: Test EDA dispatcher
+  hosts: localhost
+  gather_facts: true
+  become: true
+
+  roles:
+    - setup_instructlab
+----
+
+== Author Information
+
+Tony Kay ([email protected]) 2024-05-01
diff --git a/ansible/roles/ai_setup_instructlab/defaults/main.yml b/ansible/roles/ai_setup_instructlab/defaults/main.yml
@@ -0,0 +1,38 @@
+---
+
+# Instruct lab homes
+
+setup_instructlab_developer_preview: false
+
+# NOTE:: Currently this var may be highly changeable and should be managed in AgV
+
+setup_instructlab_llama_cpp_python_version: "0.2.79"
+setup_instructlab_git_ref: "stable"
+
+# Sets up instance for Fine Tunning, needs significant GPU resources
+
+setup_instructlab_developer_preview: false
+setup_instructlab_summit2024_mode: false
+
+setup_instructlab_repo_url: "https://github.com/instructlab/instructlab"
+setup_instructlab_taxonomy_repo_url: "https://github.com/instructlab/taxonomy"
+setup_instructlab_git_tag: main
+
+# TODO: Make this Distribution agnostic
+
+setup_instructlab_user: "instruct"
+setup_instructlab_home: "{{ setup_instructlab_install_path_base }}/instructlab"
+setup_instructlab_install_path_base: "/home/{{ setup_instructlab_user}}"  # Change this to your path
+
+setup_instructlab_cuda_home: "/usr/local/cuda"
+setup_instructlab_cuda_lib_path: "/usr/local/cuda/lib64"
+
+setup_instructlab_python_version: "3.11"
+
+setup_instructlab_developer_packages:
+  - g++
+  - gcc
+  - git
+  - make
+  - "python{{ setup_instructlab_python_version }}"
+  - "python{{ setup_instructlab_python_version }}-devel"
diff --git a/ansible/roles/ai_setup_instructlab/files/qna.yaml b/ansible/roles/ai_setup_instructlab/files/qna.yaml
@@ -0,0 +1,60 @@
+created_by: instructlab-team
+domain: instructlab
+seed_examples:
+- answer: InstructLab is a model-agnostic open source AI project that facilitates
+    contributions to Large Language Models (LLMs).
+    We are on a mission to let anyone shape generative AI by enabling contributed
+    updates to existing LLMs in an accessible way. Our community welcomes all those who
+    would like to help us enable everyone to shape the future of generative AI.
+  question: What is InstructLab?
+- answer: Check out the Instructlab Community README to get started
+    with using and contributing to the project.
+    If you want to jump right in, head to the InstructLab CLI
+    documentation to get InstructLab set up and running.
+    Learn more about the skills and knowledge you can add to models.
+    You may wish to read through the project's FAQ to get more familiar
+    with all aspects of InstructLab. You can find all the ways to
+    collaborate with project maintainers and your fellow users of
+    InstructLab beyond GitHub by visiting our project collaboration page.
+  question: How to get started with InstructLab
+- answer: There are many projects rapidly embracing and extending
+    permissively licensed AI models, but they are faced with three
+    main challenges like Contribution to LLMs is not possible directly.
+    They show up as forks, which forces consumers to choose a “best-fit”
+    model that is not easily extensible. Also, the forks are expensive
+    for model creators to maintain. The ability to contribute ideas is
+    limited by a lack of AI/ML expertise. One has to learn how to fork,
+    train, and refine models to see their idea move forward.
+    This is a high barrier to entry. There is no direct community
+    governance or best practice around review, curation, and
+    distribution of forked models.
+  question: What problems is Instructlab aiming to solve?
+- answer: InstructLab was created by Red Hat and IBM Research.
+  question: Who created Instructlab?
+- answer: The project enables community contributors to add
+    additional "skills" or "knowledge" to a particular model. InstructLab's
+    model-agnostic technology gives model upstreams with sufficient
+    infrastructure resources the ability to create regular builds of
+    their open source licensed models not by rebuilding and retraining
+    the entire model but by composing new skills into it.
+    The community welcomes all those who would like to help enable
+    everyone to shape the future of generative AI.
+  question: How does Instructlab enable community collaboration?
+- answer: Yes, InstructLab is a model-agnostic open source AI project
+    that facilitates contributions to Large Language Models (LLMs).
+  question: Is Instructlab an open source project?
+- answer: InstructLab uses a novel synthetic data-based alignment
+    tuning method for Large Language Models (LLMs.)
+    The "lab" in InstructLab stands for Large-Scale Alignment for ChatBots
+  question: What is the tuning method for Instructlab?
+- answer: The mission of instructlab is to let everyone shape generative AI
+    by enabling contributed updates to existing LLMs in an accessible way.
+    The community welcomes all those who would like to help enable everyone
+    to shape the future of generative AI.
+  question: What is the mission of Instructlab?
+task_description: 'Details on instructlab community project'
+document:
+  repo: https://github.com/instructlab/.github
+  commit: 83d9852ad97c6b27d4b24508f7cfe7ff5dd04d0d
+  patterns:
+    - README.md
diff --git a/ansible/roles/ai_setup_instructlab/tasks/10-nvidia-customizations.yml b/ansible/roles/ai_setup_instructlab/tasks/10-nvidia-customizations.yml
@@ -0,0 +1,21 @@
+---
+
+- name: Set CUDA related vars for all users
+  ansible.builtin.blockinfile:
+    path: /etc/environment
+    block: |
+      CUDA_HOME={{ setup_instructlab_cuda_home }}
+      LD_LIBRARY_PATH={{ setup_instructlab_cuda_lib_path }}
+    marker: "# {mark} ANSIBLE MANAGED BLOCK"
+    create: true
+
+- name: Set CUDA related vars etc in .bashrc
+  ansible.builtin.blockinfile:
+    path: "/home/{{ setup_instructlab_user }}/.bashrc"
+    block: |
+      export CUDA_HOME={{ setup_instructlab_cuda_home }}
+      export LD_LIBRARY_PATH={{ setup_instructlab_cuda_lib_path }}:/usr/lib64
+      export PATH=$PATH:/usr/local/cuda/bin
+    marker: "# {mark} ANSIBLE MANAGED BLOCK"
+    create: true
+  become_user: "{{ setup_instructlab_user | default('instruct') }}"
diff --git a/ansible/roles/ai_setup_instructlab/tasks/20-developer-customizations.yml b/ansible/roles/ai_setup_instructlab/tasks/20-developer-customizations.yml
@@ -0,0 +1,16 @@
+---
+
+- name: Setup AI developer packages
+  ansible.builtin.dnf:
+    name: "{{ package }}"
+    state: present
+  loop: "{{ setup_instructlab_developer_packages }}"
+  loop_control:
+    loop_var: package
+
+- name: "Set system default Python version, to {{ setup_instructlab_python_version }}"
+  ansible.builtin.alternatives:
+    name: python
+    link: /usr/bin/python3
+    path: "/usr/bin/python{{ setup_instructlab_python_version | default('3.11') }}"
+
diff --git a/ansible/roles/ai_setup_instructlab/tasks/30-instruct-repos.yml b/ansible/roles/ai_setup_instructlab/tasks/30-instruct-repos.yml
@@ -0,0 +1,26 @@
+---
+
+- name: Setup the users InstructLab environment
+  block:
+
+    - name: Clone InstructLab repository
+      ansible.builtin.git:
+        repo: "{{ setup_instructlab_repo_url }}"
+        dest: "{{ setup_instructlab_home }}"
+        version: "{{ setup_instructlab_git_ref }}"
+        clone: true
+        update: true
+      register: r_git_clone_instructlab
+
+    - name: Clone taxonomy repository if not present
+      when: not setup_instructlab_summit2024_mode | bool
+      ansible.builtin.git:
+        repo: "{{ setup_instructlab_taxonomy_repo_url }}"
+        dest: "{{ setup_instructlab_home }}/taxonomy"
+        version: "main"
+        clone: true
+        update: true
+      register: r_git_clone_taxonomy
+
+  become_user: "{{ setup_instructlab_user | default('instruct') }}"
+
diff --git a/ansible/roles/ai_setup_instructlab/tasks/40-instructlab-runtime-setup.yml b/ansible/roles/ai_setup_instructlab/tasks/40-instructlab-runtime-setup.yml
@@ -0,0 +1,59 @@
+---
+- name: Venv and iLab Python setup
+  block:
+
+    - name: "Setup a Python {{ setup_instructlab_python_version }} virtual environment"
+      ansible.builtin.command:
+        cmd: >-
+          python{{ setup_instructlab_python_version }} -m venv 
+          --upgrade-deps {{ setup_instructlab_home }}/venv
+      args:
+        creates: "{{ setup_instructlab_home }}/venv"
+
+    - name: Install InstructLab package from local git repo
+      ansible.builtin.command:
+        cmd: >-
+          {{ setup_instructlab_home }}/venv/bin/pip
+          install {{ setup_instructlab_home }}
+
+#    - name: Remove the pip cache directory
+#      ansible.builtin.file:
+#        path: "{{ setup_instructlab_install_path_base }}/.cache/pip"
+#        state: absent
+#        force: yes
+
+    - name: Remove llama_cpp_python from pip cache
+      ansible.builtin.command:
+        cmd: >-
+          {{ setup_instructlab_home }}/venv/bin/pip
+          cache remove llama_cpp_python
+
+#    - name: Setup llama-cpp-python with CUDA Support
+#      ansible.builtin.command:
+#        cmd: >-
+#            {{ setup_instructlab_home }}/venv/bin/pip install
+#            --force-reinstall "llama_cpp_python[server]==0.2.79"
+#            --config-settings cmake.args="-DLLAMA_CUDA=on"
+#      ignore_errors: true
+
+    - name: Setup llama-cpp-python with CUDA support
+      ansible.builtin.command:
+        cmd: >-
+            {{ setup_instructlab_home }}/venv/bin/pip install -v
+            --force-reinstall "llama_cpp_python[server]==0.2.79"
+            --config-settings cmake.args="-DLLAMA_CUDA=on"
+      environment:
+        CUDA_HOME: "{{ setup_instructlab_cuda_home }}"
+        LD_LIBRARY_PATH: "{{ setup_instructlab_cuda_lib_path }}:/usr/lib64"
+        PATH: "{{ ansible_env.PATH }}:/usr/local/cuda/bin"
+      ignore_errors: true
+
+    # - name: Setup instructlab[cuda]
+    #   ansible.builtin.command:
+    #     cmd: >-
+    #       {{ setup_instructlab_home }}/venv/bin/pip
+    #       install 'instructlab[cuda]'
+    #   ignore_errors: true
+
+  become: true
+  become_user: "{{ setup_instructlab_user }}"
diff --git a/ansible/roles/ai_setup_instructlab/tasks/main.yml b/ansible/roles/ai_setup_instructlab/tasks/main.yml
@@ -0,0 +1,26 @@
+---
+
+- name: Setup Nvidia customizations
+  ansible.builtin.include_tasks:
+    file: 10-nvidia-customizations.yml
+  tags:
+    - setup-instructlab-nvidia
+
+- name: Setup Developer customizations
+  ansible.builtin.include_tasks:
+    file: 20-developer-customizations.yml
+  tags:
+    - setup-instructlab-developer
+
+- name: Setup InstructLab Repos
+  ansible.builtin.include_tasks:
+    file: 30-instruct-repos.yml
+  tags:
+    - setup-instructlab-repos
+
+- name: Setup InstructLab Runtime env
+  ansible.builtin.include_tasks:
+    file: 40-instructlab-runtime-setup.yml
+  tags:
+    - setup-instructlab-runtime
+