From 17d37d8c78d0c9c545aac3ed4d56a599a352d33b Mon Sep 17 00:00:00 2001 From: Daniel Gross Date: Mon, 18 Nov 2024 15:31:02 -0500 Subject: [PATCH 1/2] Add `Category#load_from_source` --- dev/lib/product_taxonomy/models/category.rb | 239 ++++++++++++- dev/test/benchmarks.rb | 16 + dev/test/models/category_test.rb | 370 ++++++++++++++++++++ 3 files changed, 621 insertions(+), 4 deletions(-) create mode 100644 dev/test/models/category_test.rb diff --git a/dev/lib/product_taxonomy/models/category.rb b/dev/lib/product_taxonomy/models/category.rb index 74eff2ac2..a5c430c93 100644 --- a/dev/lib/product_taxonomy/models/category.rb +++ b/dev/lib/product_taxonomy/models/category.rb @@ -2,15 +2,246 @@ module ProductTaxonomy class Category - attr_reader :id, :name, :children, :attributes - attr_accessor :parent + include ActiveModel::Validations - def initialize(id:, name:, attributes: [], parent: nil) + class << self + # Load categories from source data. + # + # @param source_data [Array] The source data to load categories from. + # @param attributes [Hash] The attributes of the categories, keyed by friendly ID. + # @return Array The root categories (verticals) of the category tree. + def load_from_source(source_data:, attributes:) + model_index = ModelIndex.new(self) + + raise ArgumentError, "source_data must be an array" unless source_data.is_a?(Array) + + # First pass: Create all nodes and add to model index + source_data.each do |item| + node = Category.new( + id: item["id"], + name: item["name"], + attributes: item["attributes"]&.map { attributes[_1] || _1 }, + uniqueness_context: model_index, + ) + model_index.add(node) + end + + # Second pass: Build relationships + nodes_by_id = model_index.hashed_by(:id) + source_data.each do |item| + parent = nodes_by_id[item["id"]] + add_children(type: "children", nodes: nodes_by_id, item:, parent:) + add_children(type: "secondary_children", nodes: nodes_by_id, item:, parent:) + end + + # Third pass: Validate all nodes and collect root nodes + nodes_by_id.values.each_with_object([]) do |node, root_nodes| + node.validate! + root_nodes << node if node.root? + end + end + + private + + def add_children(type:, nodes:, item:, parent:) + item[type]&.each do |child_id| + child = nodes[child_id] || child_id + + case type + when "children" then parent.add_child(child) + when "secondary_children" then parent.add_secondary_child(child) + end + end + end + end + + validates :id, format: { with: /\A[a-z]{2}(-\d+)*\z/ } + validates :name, presence: true + validate :id_matches_depth + validate :id_starts_with_parent_id, unless: :root? + validate :attributes_found? + validate :children_found? + validate :secondary_children_found? + validates_with ProductTaxonomy::ModelIndex::UniquenessValidator, attributes: [:id] + + attr_reader :id, :name, :children, :secondary_children, :attributes, :uniqueness_context + attr_accessor :parent, :secondary_parents + + # @param id [String] The ID of the category. + # @param name [String] The name of the category. + # @param attributes [Array] The attributes of the category. + # @param uniqueness_context [ModelIndex] The uniqueness context for the category. + # @param parent [Category] The parent category of the category. + def initialize(id:, name:, attributes: [], uniqueness_context: nil, parent: nil) @id = id @name = name @children = [] + @secondary_children = [] @attributes = attributes - @parent = nil + @parent = parent + @secondary_parents = [] + @uniqueness_context = uniqueness_context + end + + # + # Manipulation + # + + # Add a child to the category + # + # @param [Category|String] child node, or the friendly ID if the node was not found. + def add_child(child) + @children << child + + return unless child.is_a?(Category) + + child.parent = self + end + + # Add a secondary child to the category + # + # @param [Category|String] child node, or the friendly ID if the node was not found. + def add_secondary_child(child) + @secondary_children << child + + return unless child.is_a?(Category) + + child.secondary_parents << self + end + + # + # Information + # + def inspect + "#<#{self.class.name} id=#{id} name=#{name}>" + end + + # Whether the category is the root category + # + # @return [Boolean] + def root? + parent.nil? + end + + # Whether the category is a leaf category + # + # @return [Boolean] + def leaf? + children.empty? + end + + # The level of the category + # + # @return [Integer] + def level + ancestors.size + end + + # The root category in this category's tree + # + # @return [Category] + def root + ancestors.last || self + end + + # The ancestors of the category + # + # @return [Array] + def ancestors + if root? + [] + else + [parent] + parent.ancestors + end + end + + # The full name of the category + # + # @return [String] + def full_name + return name if root? + + parent.full_name + " > " + name + end + + # Whether the category is a descendant of another category + # + # @param [Category] category + # @return [Boolean] + def descendant_of?(category) + ancestors.include?(category) + end + + # Iterate over the category and all its descendants + # + # @yield [Category] + def traverse(&block) + yield self + children.each { _1.traverse(&block) } + end + + private + + # + # Validation + # + def id_matches_depth + parts_count = id.split("-").size + + return if parts_count == level + 1 + + if level.zero? + # In this case, the most likely mistake was not adding the category to the parent's `children` field. + errors.add(:base, :orphan, message: "\"#{id}\" does not appear in the children of any category") + else + errors.add( + :id, + :depth_mismatch, + message: "\"#{id}\" has #{parts_count} #{"part".pluralize(parts_count)} but is at level #{level + 1}", + ) + end + end + + def id_starts_with_parent_id + return if id.start_with?(parent.id) + + errors.add(:id, :prefix_mismatch, message: "\"#{id}\" must be prefixed by \"#{parent.id}\"") + end + + def attributes_found? + attributes&.each do |attribute| + next if attribute.is_a?(Attribute) + + errors.add( + :attributes, + :not_found, + message: "could not be resolved for friendly ID \"#{attribute}\"", + ) + end + end + + def children_found? + children&.each do |child| + next if child.is_a?(Category) + + errors.add( + :children, + :not_found, + message: "could not be resolved for friendly ID \"#{child}\"", + ) + end + end + + def secondary_children_found? + secondary_children&.each do |child| + next if child.is_a?(Category) + + errors.add( + :secondary_children, + :not_found, + message: "could not be resolved for friendly ID \"#{child}\"", + ) + end end end end diff --git a/dev/test/benchmarks.rb b/dev/test/benchmarks.rb index 3b98619bb..b81449454 100644 --- a/dev/test/benchmarks.rb +++ b/dev/test/benchmarks.rb @@ -19,5 +19,21 @@ module ProductTaxonomy values: values_model_index.hashed_by(:friendly_id), ) end + + x.report("Load values, attributes, and categories") do + values_model_index = Value.load_from_source(source_data: YAML.safe_load_file("../data/values.yml")) + + attributes_model_index = Attribute.load_from_source( + source_data: YAML.safe_load_file("../data/attributes.yml"), + values: values_model_index.hashed_by(:friendly_id), + ) + categories_source_data = Dir.glob("../data/categories/*.yml").each_with_object([]) do |file, array| + array.concat(YAML.safe_load_file(file)) + end + Category.load_from_source( + source_data: categories_source_data, + attributes: attributes_model_index.hashed_by(:friendly_id), + ) + end end end diff --git a/dev/test/models/category_test.rb b/dev/test/models/category_test.rb new file mode 100644 index 000000000..19c99b6d8 --- /dev/null +++ b/dev/test/models/category_test.rb @@ -0,0 +1,370 @@ +# frozen_string_literal: true + +require "test_helper" + +module ProductTaxonomy + class CategoryTest < ActiveSupport::TestCase + setup do + @root = Category.new(id: "aa", name: "Root") + @child = Category.new(id: "aa-1", name: "Child") + @root.add_child(@child) + @grandchild = Category.new(id: "aa-1-1", name: "Grandchild") + @child.add_child(@grandchild) + end + + test "add_child sets parent-child relationship" do + root = Category.new(id: "aa", name: "Root") + child = Category.new(id: "aa-1", name: "Child") + root.add_child(child) + assert_includes root.children, child + assert_equal root, child.parent + end + + test "add_secondary_child sets secondary relationships" do + root = Category.new(id: "aa", name: "Root") + root2 = Category.new(id: "bb", name: "Root2") + child = Category.new(id: "aa-1", name: "Child") + root.add_secondary_child(child) + root2.add_secondary_child(child) + assert_includes root.secondary_children, child + assert_includes root2.secondary_children, child + assert_equal [root, root2], child.secondary_parents + end + + test "root? is true for root node" do + assert @root.root? + end + + test "root? is false for child node" do + refute @child.root? + end + + test "leaf? is true for node without children" do + assert @grandchild.leaf? + end + + test "leaf? is true for node with only secondary children" do + @grandchild.add_secondary_child(@child) + assert @grandchild.leaf? + end + + test "leaf? is false for node with children" do + refute @root.leaf? + end + + test "level is 0 for root node" do + assert_equal 0, @root.level + end + + test "level is 1 for child node" do + assert_equal 1, @child.level + end + + test "level is 2 for grandchild node" do + assert_equal 2, @grandchild.level + end + + test "ancestors is empty for root node" do + assert_empty @root.ancestors + end + + test "ancestors contains [root] for child node" do + assert_equal [@root], @child.ancestors + end + + test "ancestors contains [parent, root] for grandchild node" do + assert_equal [@child, @root], @grandchild.ancestors + end + + test "root returns self for root node" do + assert_equal @root, @root.root + end + + test "root returns root node for child node" do + assert_equal @root, @child.root + end + + test "root returns root node for grandchild node" do + assert_equal @root, @grandchild.root + end + + test "full_name returns name for root node" do + assert_equal "Root", @root.full_name + end + + test "full_name returns parent > child for child node" do + assert_equal "Root > Child", @child.full_name + end + + test "full_name returns full path for grandchild node" do + assert_equal "Root > Child > Grandchild", @grandchild.full_name + end + + test "descendant_of? is true for child of root" do + assert @child.descendant_of?(@root) + end + + test "descendant_of? is true for grandchild of root" do + assert @grandchild.descendant_of?(@root) + end + + test "descendant_of? is true for child of parent" do + assert @grandchild.descendant_of?(@child) + end + + test "descendant_of? is false for parent of child" do + refute @child.descendant_of?(@grandchild) + end + + test "descendant_of? is false for root of child" do + refute @root.descendant_of?(@child) + end + + test "raises validation error if id is invalid" do + category = Category.new(id: "foo", name: "Test") + error = assert_raises(ActiveModel::ValidationError) { category.validate! } + expected_errors = { + id: [{ error: :invalid, value: "foo" }], + } + assert_equal expected_errors, error.model.errors.details + end + + test "raises validation error if name is blank" do + category = Category.new(id: "aa", name: "") + error = assert_raises(ActiveModel::ValidationError) { category.validate! } + expected_errors = { + name: [{ error: :blank }], + } + assert_equal expected_errors, error.model.errors.details + end + + test "raises validation error if id depth doesn't match hierarchy level" do + # A root category should have format "aa" + parent = Category.new(id: "aa", name: "Root") + category = Category.new(id: "aa-1-1", name: "Test", parent:) + error = assert_raises(ActiveModel::ValidationError) { category.validate! } + expected_errors = { + id: [{ error: :depth_mismatch }], + } + assert_equal expected_errors, error.model.errors.details + end + + test "raises validation error if child id doesn't start with parent id" do + root = Category.new(id: "aa", name: "Root") + child = Category.new(id: "bb-1", name: "Child") + root.add_child(child) + + error = assert_raises(ActiveModel::ValidationError) { child.validate! } + expected_errors = { + id: [{ error: :prefix_mismatch }], + } + assert_equal expected_errors, error.model.errors.details + end + + test "raises validation error for duplicate id" do + uniqueness_context = ModelIndex.new(Category) + root = Category.new(id: "aa", name: "Root", uniqueness_context:) + uniqueness_context.add(root) + child = Category.new(id: "aa", name: "Child", uniqueness_context:) + uniqueness_context.add(child) + error = assert_raises(ActiveModel::ValidationError) { child.validate! } + expected_errors = { + id: [{ error: :taken }], + } + assert_equal expected_errors, error.model.errors.details + end + + test "validates correct id format examples" do + assert_nothing_raised do + Category.new(id: "aa", name: "Root").validate! + root = Category.new(id: "bb", name: "Root") + child = Category.new(id: "bb-1", name: "Child") + root.add_child(child) + child.validate! + grandchild = Category.new(id: "bb-1-1", name: "Grandchild") + child.add_child(grandchild) + grandchild.validate! + end + end + + test "load_from_source loads categories from deserialized YAML" do + value = Value.new(id: 1, name: "Black", friendly_id: "black", handle: "black") + attribute = Attribute.new( + id: 1, + name: "Color", + friendly_id: "color", + handle: "color", + description: "Defines the primary color or pattern, such as blue or striped", + values: [value], + ) + yaml_content = <<~YAML + --- + - id: aa + name: Apparel & Accessories + children: + - aa-1 + attributes: + - color + - id: aa-1 + name: Clothing + children: [] + attributes: + - color + YAML + + categories = Category.load_from_source( + source_data: YAML.safe_load(yaml_content), + attributes: { "color" => attribute }, + ) + + assert_equal 1, categories.size + assert_equal "aa", categories.first.id + assert_equal "Apparel & Accessories", categories.first.name + assert_equal [attribute], categories.first.attributes + assert_equal 1, categories.first.children.size + assert_equal "aa-1", categories.first.children.first.id + assert_equal [attribute], categories.first.children.first.attributes + end + + test "load_from_source raises validation error if attribute is not found" do + yaml_content = <<~YAML + --- + - id: aa + name: Apparel & Accessories + children: [] + attributes: + - foo + YAML + + error = assert_raises(ActiveModel::ValidationError) do + Category.load_from_source(source_data: YAML.safe_load(yaml_content), attributes: {}) + end + expected_errors = { + attributes: [{ error: :not_found }], + } + assert_equal expected_errors, error.model.errors.details + end + + test "load_from_source raises validation error if child is not found" do + yaml_content = <<~YAML + --- + - id: aa + name: Apparel & Accessories + children: + - aa-1 + attributes: [] + YAML + + error = assert_raises(ActiveModel::ValidationError) do + Category.load_from_source(source_data: YAML.safe_load(yaml_content), attributes: {}) + end + expected_errors = { + children: [{ error: :not_found }], + } + assert_equal expected_errors, error.model.errors.details + end + + test "load_from_source raises validation error if secondary child is not found" do + yaml_content = <<~YAML + --- + - id: aa + name: Apparel & Accessories + secondary_children: + - aa-1 + attributes: [] + YAML + + error = assert_raises(ActiveModel::ValidationError) do + Category.load_from_source(source_data: YAML.safe_load(yaml_content), attributes: {}) + end + expected_errors = { + secondary_children: [{ error: :not_found }], + } + assert_equal expected_errors, error.model.errors.details + end + + test "load_from_source raises validation error if category is orphaned" do + yaml_content = <<~YAML + --- + - id: aa + name: Apparel & Accessories + attributes: [] + children: [] # aa-1 is missing + - id: aa-1 + name: Clothing + attributes: [] + YAML + + error = assert_raises(ActiveModel::ValidationError) do + Category.load_from_source(source_data: YAML.safe_load(yaml_content), attributes: {}) + end + expected_errors = { + base: [{ error: :orphan }], + } + assert_equal expected_errors, error.model.errors.details + end + + test "load_from_source loads categories with multiple paths from deserialized YAML" do + value = Value.new(id: 1, name: "Black", friendly_id: "black", handle: "black") + attribute = Attribute.new( + id: 1, + name: "Color", + friendly_id: "color", + handle: "color", + description: "Defines the primary color or pattern, such as blue or striped", + values: [value], + ) + yaml_content = <<~YAML + --- + - id: aa + name: Apparel & Accessories + children: + - aa-1 + attributes: + - color + - id: aa-1 + name: Clothing + children: [] + secondary_children: + - bi-19-10 + - id: bi + name: Business & Industrial + children: + - bi-19 + attributes: + - color + - id: bi-19 + name: Medical + children: + - bi-19-10 + attributes: + - color + - id: bi-19-10 + name: Scrubs + children: [] + attributes: + - color + YAML + + categories = Category.load_from_source( + source_data: YAML.safe_load(yaml_content), + attributes: { "color" => attribute }, + ) + + aa_root = categories.first + aa_clothing = aa_root.children.first + bi_root = categories.second + bi_medical = bi_root.children.first + bi_scrubs = bi_medical.children.first + + assert_equal "aa", aa_root.id + assert_equal "aa-1", aa_clothing.id + assert_equal "bi", bi_root.id + assert_equal "bi-19", bi_medical.id + assert_equal "bi-19-10", bi_scrubs.id + assert_equal bi_scrubs, aa_clothing.secondary_children.first + assert_equal bi_medical, bi_scrubs.parent + assert_equal aa_clothing, bi_scrubs.secondary_parents.first + end + end +end From dfabc3d13b482418ea2f34adad096e32dea94445 Mon Sep 17 00:00:00 2001 From: Daniel Gross Date: Tue, 19 Nov 2024 09:55:28 -0500 Subject: [PATCH 2/2] Update dev/lib/product_taxonomy/models/category.rb Co-authored-by: Tyler Rowsell --- dev/lib/product_taxonomy/models/category.rb | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/dev/lib/product_taxonomy/models/category.rb b/dev/lib/product_taxonomy/models/category.rb index a5c430c93..805dceb2b 100644 --- a/dev/lib/product_taxonomy/models/category.rb +++ b/dev/lib/product_taxonomy/models/category.rb @@ -148,11 +148,9 @@ def root # # @return [Array] def ancestors - if root? - [] - else - [parent] + parent.ancestors - end + return [] if root? + + [parent] + parent.ancestors end # The full name of the category