Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(knowledge-base): implement course material knowledge base #7690

Draft
wants to merge 18 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@ executors:
DATABASE_URL: 'postgres://ubuntu@localhost:5432/coursemology_test'
COLLECT_COVERAGE: << parameters.collects_rails_coverage >>

- image: cimg/postgres:16.1
- image: pgvector/pgvector:pg16
environment:
POSTGRES_USER: ubuntu
POSTGRES_DB: coursemology_test
POSTGRES_PASSWORD: Testing1234

- image: cimg/redis:7.2.3

Expand Down Expand Up @@ -129,7 +130,7 @@ commands:
- run:
name: Create coursemology_keycloak db
command: |
DB_CONTAINER_ID=$(docker ps -q --filter ancestor=cimg/postgres:16.1)
DB_CONTAINER_ID=$(docker ps -q --filter ancestor=pgvector/pgvector:pg16)
docker exec $DB_CONTAINER_ID psql -c "CREATE DATABASE coursemology_keycloak OWNER ubuntu;" -U ubuntu -d postgres
docker exec $DB_CONTAINER_ID psql -c "CREATE DATABASE coursemology OWNER ubuntu;" -U ubuntu -d postgres
- run:
Expand All @@ -148,7 +149,7 @@ commands:
working_directory: authentication
command: |
touch .env
echo KC_NETWORK_MODE="container:$(docker ps -q --filter ancestor=cimg/postgres:16.1)" >> .env
echo KC_NETWORK_MODE="container:$(docker ps -q --filter ancestor=pgvector/pgvector:pg16)" >> .env
echo KC_DB="postgres" >> .env
echo KC_DB_URL="jdbc:postgresql://localhost:5432/coursemology_keycloak" >> .env
echo KC_DB_USERNAME="ubuntu" >> .env
Expand Down
7 changes: 7 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,10 @@ gem 'rails-html-sanitizer', '>= 1.0.4'

gem 'mimemagic', '0.4.3'
gem 'ffi', '>= 1.14.2'

# Retreival Augmented Generation (RAG) Support
gem 'pgvector'
gem 'neighbor'
gem 'langchainrb'
gem 'ruby-openai'
gem 'pdf-reader'
47 changes: 47 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ GIT
GEM
remote: https://rubygems.org/
specs:
Ascii85 (2.0.1)
actioncable (7.2.1)
actionpack (= 7.2.1)
activesupport (= 7.2.1)
Expand Down Expand Up @@ -129,6 +130,7 @@ GEM
rails (>= 6.0)
addressable (2.8.7)
public_suffix (>= 2.0.2, < 7.0)
afm (0.2.2)
after_commit_action (1.1.0)
activerecord (>= 3.0.0)
activesupport (>= 3.0.0)
Expand All @@ -149,6 +151,7 @@ GEM
aws-sigv4 (~> 1.5)
aws-sigv4 (1.10.1)
aws-eventstream (~> 1, >= 1.0.2)
baran (0.1.12)
base64 (0.2.0)
bcrypt (3.1.20)
bigdecimal (3.1.8)
Expand Down Expand Up @@ -215,13 +218,22 @@ GEM
erubi (1.13.0)
et-orbi (1.2.11)
tzinfo
event_stream_parser (1.0.0)
excon (1.2.2)
exifr (1.4.0)
factory_bot (6.5.0)
activesupport (>= 5.0.0)
factory_bot_rails (6.4.3)
factory_bot (~> 6.4)
railties (>= 5.0.0)
faraday (2.12.0)
faraday-net_http (>= 2.0, < 3.4)
json
logger
faraday-multipart (1.0.4)
multipart-post (~> 2)
faraday-net_http (3.3.0)
net-http
ffi (1.17.0)
filename (0.1.2)
flamegraph (0.9.5)
Expand Down Expand Up @@ -250,6 +262,7 @@ GEM
raabro (~> 1.4)
globalid (1.2.1)
activesupport (>= 6.1)
hashery (2.1.2)
highline (3.0.1)
html-pipeline (2.14.3)
activesupport (>= 2)
Expand Down Expand Up @@ -295,6 +308,8 @@ GEM
activesupport (>= 5.0.0)
jmespath (1.6.2)
json (2.7.4)
json-schema (4.3.1)
addressable (>= 2.8)
jwt (2.9.3)
base64
kaminari (1.2.2)
Expand All @@ -313,6 +328,12 @@ GEM
json (~> 2.6)
jwt (~> 2.4)
rest-client (~> 2.1)
langchainrb (0.19.2)
baran (~> 0.1.9)
json-schema (~> 4)
matrix
pragmatic_segmenter (~> 0.3.0)
zeitwerk (~> 2.5)
language_server-protocol (3.17.0.3)
launchy (3.0.1)
addressable (~> 2.8)
Expand Down Expand Up @@ -355,6 +376,11 @@ GEM
mini_portile2 (2.8.8)
minitest (5.25.1)
multi_json (1.15.0)
multipart-post (2.4.1)
neighbor (0.5.0)
activerecord (>= 7)
net-http (0.4.1)
uri
net-imap (0.4.14)
date
net-protocol
Expand All @@ -377,7 +403,15 @@ GEM
parser (3.3.5.0)
ast (~> 2.4.1)
racc
pdf-reader (2.13.0)
Ascii85 (>= 1.0, < 3.0, != 2.0.0)
afm (~> 0.2.1)
hashery (~> 2.0)
ruby-rc4
ttfunk
pg (1.5.8)
pgvector (0.3.2)
pragmatic_segmenter (0.3.24)
progress (3.6.0)
psych (5.1.2)
stringio
Expand Down Expand Up @@ -527,7 +561,12 @@ GEM
rubocop (>= 1.52.0, < 2.0)
rubocop-ast (>= 1.31.1, < 2.0)
ruby-oembed (0.18.1)
ruby-openai (7.3.1)
event_stream_parser (>= 0.3.0, < 2.0.0)
faraday (>= 1)
faraday-multipart (>= 1)
ruby-progressbar (1.13.0)
ruby-rc4 (0.1.5)
ruby-vips (2.2.2)
ffi (~> 1.12)
logger
Expand Down Expand Up @@ -585,12 +624,15 @@ GEM
timeout (0.4.1)
traceroute (0.8.1)
rails (>= 3.0.0)
ttfunk (1.8.0)
bigdecimal (~> 3.1)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
unicode-display_width (2.6.0)
uniform_notifier (1.16.0)
unread (0.14.0)
activerecord (>= 6.1)
uri (0.13.1)
useragent (0.16.10)
validates_hostname (1.0.13)
activerecord (>= 3.0)
Expand Down Expand Up @@ -655,17 +697,21 @@ DEPENDENCIES
jwt
kaminari
keycloak
langchainrb
listen
lograge
lograge-sql
lol_dba
loofah (>= 2.2.1)
mimemagic (= 0.4.3)
mini_magick
neighbor
nokogiri (>= 1.8.1)
ostruct
parallel_tests
pdf-reader
pg
pgvector
puma
rack-cors
rack-mini-profiler
Expand All @@ -686,6 +732,7 @@ DEPENDENCIES
rubocop (~> 1.67)
rubocop-rails
ruby-oembed
ruby-openai
rubyzip
rwordnet!
sanitize (>= 4.6.3)
Expand Down
41 changes: 39 additions & 2 deletions app/controllers/course/material/materials_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
end

def update
if @material.update(material_params)
if @material.workflow_state != 'chunking' && @material.update(material_params)
# deletes material's text chunk if file has been changed and file has been chunked
delete_material_text_chunks if material_params['file'] && @material.workflow_state == 'chunked'
course_user = @material.attachment.updater.course_users.find_by(course: current_course)
user = course_user || @material.attachment.updater
render json: { id: @material.id,
name: @material.name,
description: @material.description,
updatedAt: @material.attachment.updated_at,
workflowState: @material.workflow_state,
updater: { id: user.id, name: user.name,
userUrl: url_to_user_or_course_user(current_course, user) } },
status: :ok
Expand All @@ -25,7 +28,27 @@
end

def destroy
if @material.destroy
if @material.destroy && @material.workflow_state != 'chunking'
head :ok
else
render json: { errors: @material.errors.full_messages.to_sentence }, status: :bad_request
end
end

def create_text_chunks
job = last_text_chunking_job
if job
render partial: 'jobs/submitted', locals: { job: job }

Check warning on line 41 in app/controllers/course/material/materials_controller.rb

View check run for this annotation

Codecov / codecov/patch

app/controllers/course/material/materials_controller.rb#L39-L41

Added lines #L39 - L41 were not covered by tests
else
job = @material.text_chunking!(current_user)
render partial: 'jobs/submitted', locals: { job: job.job }

Check warning on line 44 in app/controllers/course/material/materials_controller.rb

View check run for this annotation

Codecov / codecov/patch

app/controllers/course/material/materials_controller.rb#L43-L44

Added lines #L43 - L44 were not covered by tests
end
end

def destroy_text_chunks
if @material.text_chunk_references.destroy_all && @material.workflow_state == 'chunked'
@material.delete_chunks!
@material.save!

Check warning on line 51 in app/controllers/course/material/materials_controller.rb

View check run for this annotation

Codecov / codecov/patch

app/controllers/course/material/materials_controller.rb#L49-L51

Added lines #L49 - L51 were not covered by tests
head :ok
else
render json: { errors: @material.errors.full_messages.to_sentence }, status: :bad_request
Expand Down Expand Up @@ -65,4 +88,18 @@
@log_service ||=
Course::Assessment::SessionLogService.new(@assessment, current_session_id, @submission)
end

def last_text_chunking_job
job = @material.text_chunking&.job
(job&.status == 'submitted') ? job : nil

Check warning on line 94 in app/controllers/course/material/materials_controller.rb

View check run for this annotation

Codecov / codecov/patch

app/controllers/course/material/materials_controller.rb#L93-L94

Added lines #L93 - L94 were not covered by tests
end

def delete_material_text_chunks
if @material.text_chunk_references.destroy_all
@material.delete_chunks!
@material.save

Check warning on line 100 in app/controllers/course/material/materials_controller.rb

View check run for this annotation

Codecov / codecov/patch

app/controllers/course/material/materials_controller.rb#L98-L100

Added lines #L98 - L100 were not covered by tests
else
render json: { errors: @material.errors.full_messages.to_sentence }, status: :bad_request

Check warning on line 102 in app/controllers/course/material/materials_controller.rb

View check run for this annotation

Codecov / codecov/patch

app/controllers/course/material/materials_controller.rb#L102

Added line #L102 was not covered by tests
end
end
end
20 changes: 20 additions & 0 deletions app/jobs/course/material/text_chunk_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# frozen_string_literal: true
class Course::Material::TextChunkJob < ApplicationJob
include TrackableJob
queue_as :default

protected

def perform_tracked(material, current_user)
material.start_chunking!
material.save!
material.build_text_chunks(current_user)
material.finish_chunking!
material.save!

Check warning on line 13 in app/jobs/course/material/text_chunk_job.rb

View check run for this annotation

Codecov / codecov/patch

app/jobs/course/material/text_chunk_job.rb#L9-L13

Added lines #L9 - L13 were not covered by tests
rescue StandardError => e
material.cancel_chunking!
material.save!

Check warning on line 16 in app/jobs/course/material/text_chunk_job.rb

View check run for this annotation

Codecov / codecov/patch

app/jobs/course/material/text_chunk_job.rb#L15-L16

Added lines #L15 - L16 were not covered by tests
# re-raise error to make the job have an error
raise e

Check warning on line 18 in app/jobs/course/material/text_chunk_job.rb

View check run for this annotation

Codecov / codecov/patch

app/jobs/course/material/text_chunk_job.rb#L18

Added line #L18 was not covered by tests
end
end
14 changes: 13 additions & 1 deletion app/models/components/course/materials_ability_component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ def define_permissions
allow_upload_materials
allow_staff_read_materials if course_user.staff?
allow_teaching_staff_manage_materials if course_user.teaching_staff?
disallow_text_chunking if course_user.teaching_staff?
manage_text_chunking if course_user.manager_or_owner?
end

disallow_superusers_change_root_and_linked_folders
Expand Down Expand Up @@ -45,6 +47,16 @@ def allow_upload_materials
can :manage, Course::Material, creator: user
end

def manage_text_chunking
can :create_text_chunks, Course::Material, material_course_hash
can :destroy_text_chunks, Course::Material, material_course_hash
end

def disallow_text_chunking
cannot :create_text_chunks, Course::Material, material_course_hash
cannot :destroy_text_chunks, Course::Material, material_course_hash
end

def allow_staff_read_materials
can :read, Course::Material, material_course_hash
can [:read, :download], Course::Material::Folder, { course_id: course.id }
Expand Down Expand Up @@ -82,7 +94,7 @@ def concrete_folder_hash
def opened_material_hashes
max_start_at = Time.zone.now
# Extend start_at time with self directed time from course settings.
max_start_at += (course.advance_start_at_duration || 0) if course
max_start_at += course.advance_start_at_duration || 0 if course

# Add materials with parent assessments that open early due to personalized timeline
# Dealing with personal times is too complicated to represent as a hash of conditions
Expand Down
Loading