From e53b39e428a4d26efdeb1b19d00198f78a6950d7 Mon Sep 17 00:00:00 2001 From: Cinder Date: Sat, 27 Jul 2024 09:19:49 -0700 Subject: [PATCH] [Blacklist] Rewrite blacklist normalization (#687) This allows for both the optional (~) tags in the blacklist, and for the comment syntax. --- app/jobs/tag_batch_job.rb | 2 +- app/models/tag_alias.rb | 69 +++++++++++++++++++++++++++++---------- app/models/user.rb | 2 +- 3 files changed, 53 insertions(+), 20 deletions(-) diff --git a/app/jobs/tag_batch_job.rb b/app/jobs/tag_batch_job.rb index 30ece0d48..39e39f955 100644 --- a/app/jobs/tag_batch_job.rb +++ b/app/jobs/tag_batch_job.rb @@ -36,7 +36,7 @@ class TagBatchJob < ApplicationJob def migrate_blacklists(from, to) User.without_timeout do User.where_ilike(:blacklisted_tags, "*#{from}*").find_each(batch_size: 50) do |user| - fixed_blacklist = TagAlias.to_aliased_query(user.blacklisted_tags, overrides: { from => to }) + fixed_blacklist = TagAlias.to_aliased_query(user.blacklisted_tags, overrides: { from => to }, comments: true) user.update_column(:blacklisted_tags, fixed_blacklist) end end diff --git a/app/models/tag_alias.rb b/app/models/tag_alias.rb index f4b112099..9758eebf8 100644 --- a/app/models/tag_alias.rb +++ b/app/models/tag_alias.rb @@ -90,32 +90,65 @@ class TagAlias < TagRelationship TagAlias.to_aliased_with_originals(names).values end - def self.to_aliased_query(query, overrides: nil) + def self.to_aliased_query(query, overrides: nil, comments: false) # Remove tag types (newline syntax) query.gsub!(/(^| )(-)?(#{TagCategory::MAPPING.keys.sort_by { |x| -x.size }.join('|')}):([\S])/i, '\1\2\4') # Remove tag types (comma syntax) query.gsub!(/, (-)?(#{TagCategory::MAPPING.keys.sort_by { |x| -x.size }.join('|')}):([\S])/i, ', \1\3') + lines = query.downcase.split("\n") - collected_tags = [] + processed = [] + lookup = [] + lines.each do |line| - tags = line.split(" ").reject(&:blank?).map do |x| - negated = x[0] == '-' - [negated ? x[1..-1] : x, negated] + content = { tags: [] } + if line.strip.empty? + processed << content + next end - tags.each do |t| - collected_tags << t[0] + + # Remove comments + comment = line.match(/(?: |^)#(.*)/) + unless comment.nil? + content[:comment] = comment[1].strip + line = line.delete_suffix("##{comment[1]}") end + + # Process tags + line.split.compact_blank.map do |tag| + data = { + opt: tag.match(/^-?~/), + neg: tag.match(/^~?-/), + tag: tag.gsub(/^[-~]{1,}/, ""), + } + + # ex. only - or ~ surrounded by spaces + next if data[:tag].empty? + + content[:tags] << data + lookup << data[:tag] + end + + processed << content end - aliased = to_aliased_with_originals(collected_tags) - aliased.merge!(overrides) if overrides - lines = lines.map do |line| - tags = line.split(" ").reject(&:blank?).reject {|t| t == '-'}.map do |x| - negated = x[0] == '-' - [negated ? x[1..-1] : x, negated] + + # Look up the aliases + aliases = to_aliased_with_originals(lookup.uniq) + aliases.merge!(overrides) if overrides + + # Rebuild the blacklist text + output = processed.map do |line| + output_line = line[:tags].map do |data| + (data[:opt] ? "~" : "") + (data[:neg] ? "-" : "") + (aliases[data[:tag]] || data[:tag]) end - tags.map { |t| "#{t[1] ? '-' : ''}#{aliased[t[0]]}" }.join(" ") + output_line << "# #{line[:comment]}" if comments && !line[:comment].nil? + + output_line.uniq.join(" ") end - lines.uniq.join("\n") + + # TODO: This causes every empty line except for the very first one will get stripped. + # At the end of the day, it's not a huge deal. + output.uniq.join("\n") end def process_undo!(update_topic: true) @@ -137,7 +170,7 @@ class TagAlias < TagRelationship def update_posts_locked_tags_undo Post.without_timeout do Post.where_ilike(:locked_tags, "*#{consequent_name}*").find_each(batch_size: 50) do |post| - fixed_tags = TagAlias.to_aliased_query(post.locked_tags, overrides: {consequent_name => antecedent_name}) + fixed_tags = TagAlias.to_aliased_query(post.locked_tags, overrides: { consequent_name => antecedent_name }) post.update_column(:locked_tags, fixed_tags) end end @@ -146,7 +179,7 @@ class TagAlias < TagRelationship def update_blacklists_undo User.without_timeout do User.where_ilike(:blacklisted_tags, "*#{consequent_name}*").find_each(batch_size: 50) do |user| - fixed_blacklist = TagAlias.to_aliased_query(user.blacklisted_tags, overrides: {consequent_name => antecedent_name}) + fixed_blacklist = TagAlias.to_aliased_query(user.blacklisted_tags, overrides: { consequent_name => antecedent_name }, comments: true) user.update_column(:blacklisted_tags, fixed_blacklist) end end @@ -260,7 +293,7 @@ class TagAlias < TagRelationship def update_blacklists User.without_timeout do User.where_ilike(:blacklisted_tags, "*#{antecedent_name}*").find_each(batch_size: 50) do |user| - fixed_blacklist = TagAlias.to_aliased_query(user.blacklisted_tags) + fixed_blacklist = TagAlias.to_aliased_query(user.blacklisted_tags, comments: true) user.update_column(:blacklisted_tags, fixed_blacklist) end end diff --git a/app/models/user.rb b/app/models/user.rb index 6cf13feb9..ba9338c61 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -371,7 +371,7 @@ class User < ApplicationRecord module BlacklistMethods def normalize_blacklisted_tags - self.blacklisted_tags = TagAlias.to_aliased_query(blacklisted_tags.downcase) if blacklisted_tags.present? + self.blacklisted_tags = TagAlias.to_aliased_query(blacklisted_tags, comments: true) if blacklisted_tags.present? end def is_blacklisting_user?(user)