From 0bf820508c56eee6ca465c6924d1720040e224cc Mon Sep 17 00:00:00 2001 From: ricardotejedorsanz <128725076+ricardotejedorsanz@users.noreply.github.com> Date: Fri, 5 Sep 2025 17:10:32 +0100 Subject: [PATCH] Add compare categories command --- .gitignore | 3 + dev/lib/product_taxonomy.rb | 1 + dev/lib/product_taxonomy/cli.rb | 6 + .../commands/compare_categories_command.rb | 165 ++++++++++++++++++ 4 files changed, 175 insertions(+) create mode 100644 dev/lib/product_taxonomy/commands/compare_categories_command.rb diff --git a/.gitignore b/.gitignore index a1297f8c1..36f14e59f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ /docs/_data/unstable /docs/.jekyll-metadata node_modules + +# Ignore exports folder +/exports diff --git a/dev/lib/product_taxonomy.rb b/dev/lib/product_taxonomy.rb index 8244c0a77..3d748e264 100644 --- a/dev/lib/product_taxonomy.rb +++ b/dev/lib/product_taxonomy.rb @@ -60,3 +60,4 @@ def data_path = DATA_PATH require_relative "product_taxonomy/commands/add_attribute_command" require_relative "product_taxonomy/commands/add_attributes_to_categories_command" require_relative "product_taxonomy/commands/add_value_command" +require_relative "product_taxonomy/commands/compare_categories_command" diff --git a/dev/lib/product_taxonomy/cli.rb b/dev/lib/product_taxonomy/cli.rb index 2168709e3..48c68cac8 100644 --- a/dev/lib/product_taxonomy/cli.rb +++ b/dev/lib/product_taxonomy/cli.rb @@ -95,5 +95,11 @@ def add_attributes_to_categories(attribute_friendly_ids, category_ids) def add_value(name, attribute_friendly_id) AddValueCommand.new(options.merge(name:, attribute_friendly_id:)).run end + + desc "compare_categories VERSION_FOLDER", "Compare category changes between full_names.yml and categories.txt" + option :output_dir, type: :string, default: "exports", desc: "Output directory for CSV file" + def compare_categories(version_folder) + CompareCategoriesCommand.new(options).run(version_folder) + end end end diff --git a/dev/lib/product_taxonomy/commands/compare_categories_command.rb b/dev/lib/product_taxonomy/commands/compare_categories_command.rb new file mode 100644 index 000000000..38b0f43ed --- /dev/null +++ b/dev/lib/product_taxonomy/commands/compare_categories_command.rb @@ -0,0 +1,165 @@ +# frozen_string_literal: true + +require "csv" +require "yaml" +require "fileutils" + +module ProductTaxonomy + class CompareCategoriesCommand < Command + def execute(version_folder) + validate_version_folder!(version_folder) + + logger.info("Loading full_names from version: #{version_folder}") + full_names = load_full_names(version_folder) + logger.info("Loaded #{full_names.size} categories from full_names.yml") + + logger.info("Loading categories from dist/en/categories.txt") + categories = load_categories + logger.info("Loaded #{categories.size} categories from categories.txt") + + logger.info("Comparing categories...") + changes = compare_categories(full_names, categories) + + # Create output directory if it doesn't exist + output_dir = File.expand_path(options[:output_dir] || "exports", ProductTaxonomy.data_path) + FileUtils.mkdir_p(output_dir) + + # Write CSV report + output_path = write_csv_report(changes, version_folder, output_dir) + + # Print summary + logger.info("") + logger.info("Comparison complete!") + logger.info("Total changes detected: #{changes.size}") + + if changes.any? + change_types = changes.group_by { |change| change[:type] } + change_types.each do |change_type, changes_of_type| + logger.info(" #{change_type.capitalize}s: #{changes_of_type.size}") + end + + logger.info("") + logger.info("Detailed report saved to: #{output_path}") + else + logger.info("No changes detected between the two files.") + end + end + + private + + def validate_version_folder!(version_folder) + full_names_path = File.expand_path( + "integrations/shopify/#{version_folder}/full_names.yml", + ProductTaxonomy.data_path + ) + + unless File.exist?(full_names_path) + raise ArgumentError, "full_names.yml not found in #{version_folder}" + end + end + + def load_full_names(version_folder) + full_names_path = File.expand_path( + "integrations/shopify/#{version_folder}/full_names.yml", + ProductTaxonomy.data_path + ) + + data = YAML.safe_load_file(full_names_path) + + # Convert to hash with id as key and full_name as value + data.each_with_object({}) do |item, hash| + hash[item["id"]] = item["full_name"] + end + end + + def load_categories + categories_path = File.expand_path("../dist/en/categories.txt", ProductTaxonomy.data_path) + + unless File.exist?(categories_path) + raise ArgumentError, "categories.txt not found in dist/en/" + end + + categories = {} + + File.foreach(categories_path) do |line| + line = line.strip + next if line.empty? || line.start_with?("#") + + # Parse format: gid://shopify/TaxonomyCategory/{id} : {full_name} + if line.include?(" : ") + gid_part, full_name = line.split(" : ", 2) + + # Extract ID by removing the gid://shopify/TaxonomyCategory/ prefix + if gid_part.start_with?("gid://shopify/TaxonomyCategory/") + category_id = gid_part.gsub("gid://shopify/TaxonomyCategory/", "").strip + categories[category_id] = full_name.strip + end + end + end + + categories + end + + def compare_categories(full_names, categories) + changes = [] + + # Get all unique IDs from both sources + all_ids = (full_names.keys + categories.keys).uniq.sort + + all_ids.each do |category_id| + in_full_names = full_names.key?(category_id) + in_categories = categories.key?(category_id) + + if in_full_names && in_categories + # Check for renames (same ID, different name) + if full_names[category_id] != categories[category_id] + changes << { + type: :rename, + id: category_id, + old_name: full_names[category_id], + new_name: categories[category_id] + } + end + elsif in_full_names && !in_categories + # Archived (exists in full_names but not in categories) + changes << { + type: :archived, + id: category_id, + old_name: full_names[category_id], + new_name: "" + } + elsif !in_full_names && in_categories + # Addition (exists in categories but not in full_names) + changes << { + type: :addition, + id: category_id, + old_name: "", + new_name: categories[category_id] + } + end + end + + changes + end + + def write_csv_report(changes, version_folder, output_dir) + timestamp = Time.now.strftime("%Y%m%d_%H%M%S") + filename = "category_changes_#{version_folder}_#{timestamp}.csv" + output_path = File.join(output_dir, filename) + + CSV.open(output_path, "w", encoding: "utf-8") do |csv| + csv << %w[type id old_name new_name] + changes.each do |change| + csv << [ + change[:type], + change[:id], + change[:old_name], + change[:new_name] + ] + end + end + + output_path + end + end +end