From 762dc3da24db8942d86309a6b873ab7bff2717e7 Mon Sep 17 00:00:00 2001 From: Albert Yi Date: Mon, 6 Aug 2018 17:39:25 -0700 Subject: [PATCH] Refactor sources --- app/controllers/sources_controller.rb | 10 +- app/controllers/uploads_controller.rb | 4 +- app/logical/downloads/file.rb | 127 ++-- .../rewrite_strategies/art_station.rb | 33 - .../downloads/rewrite_strategies/base.rb | 29 - .../rewrite_strategies/deviant_art.rb | 53 -- .../downloads/rewrite_strategies/moebooru.rb | 26 - .../rewrite_strategies/nico_seiga.rb | 66 -- .../downloads/rewrite_strategies/nijie.rb | 40 -- .../downloads/rewrite_strategies/pawoo.rb | 17 - .../downloads/rewrite_strategies/pixiv.rb | 127 ---- .../downloads/rewrite_strategies/tumblr.rb | 70 -- .../downloads/rewrite_strategies/twitpic.rb | 36 - .../downloads/rewrite_strategies/twitter.rb | 40 -- app/logical/image_proxy.rb | 4 +- app/logical/iqdb/download.rb | 6 +- app/logical/nico_seiga_api_client.rb | 6 +- app/logical/pawoo_api_client.rb | 37 +- app/logical/pixiv_api_client.rb | 224 ++++--- app/logical/sources/site.rb | 78 --- app/logical/sources/strategies.rb | 29 + app/logical/sources/strategies/art_station.rb | 165 ++++- app/logical/sources/strategies/base.rb | 156 ++++- app/logical/sources/strategies/deviant_art.rb | 236 ++++--- app/logical/sources/strategies/moebooru.rb | 35 + app/logical/sources/strategies/nico_seiga.rb | 302 ++++----- app/logical/sources/strategies/nijie.rb | 235 +++---- app/logical/sources/strategies/null.rb | 43 ++ app/logical/sources/strategies/pawoo.rb | 113 ++-- app/logical/sources/strategies/pixiv.rb | 614 +++++++++--------- app/logical/sources/strategies/tumblr.rb | 213 +++--- app/logical/sources/strategies/twitter.rb | 126 ++-- app/logical/twitter_service.rb | 23 +- app/logical/upload_service.rb | 6 +- .../upload_service/controller_helper.rb | 21 +- app/logical/upload_service/preprocessor.rb | 65 +- app/logical/upload_service/replacer.rb | 6 +- app/logical/upload_service/utils.rb | 36 +- app/models/artist.rb | 11 +- app/models/artist_url.rb | 16 +- app/models/post.rb | 10 +- app/models/upload.rb | 4 +- app/views/uploads/_image.html.erb | 6 +- app/views/uploads/index.html.erb | 7 +- app/views/uploads/new.html.erb | 2 - config/docker/compose.yml | 1 + ...0816230604_rename_alt_source_on_uploads.rb | 5 + db/structure.sql | 277 ++++---- lib/tasks/images.rake | 2 +- test/functional/uploads_controller_test.rb | 70 ++ test/models/upload_service_test.rb | 250 ++++--- test/test_helpers/download_test_helper.rb | 17 +- test/unit/artist_test.rb | 7 - test/unit/artist_url_test.rb | 64 +- test/unit/downloads/art_station_test.rb | 27 +- test/unit/downloads/deviant_art_test.rb | 6 +- test/unit/downloads/file_test.rb | 2 +- test/unit/downloads/pixiv_test.rb | 68 +- test/unit/downloads/tumblr_test.rb | 80 +-- test/unit/downloads/twitter_test.rb | 15 +- test/unit/post_replacement_test.rb | 37 -- test/unit/post_test.rb | 20 - test/unit/sources/art_station_test.rb | 15 +- test/unit/sources/deviantart_test.rb | 23 +- test/unit/sources/nico_seiga_test.rb | 14 +- test/unit/sources/nijie_test.rb | 26 +- test/unit/sources/pawoo_test.rb | 9 +- test/unit/sources/pixiv_test.rb | 30 +- test/unit/sources/tumblr_test.rb | 48 +- test/unit/sources/twitter_test.rb | 143 ++-- test/unit/tag_alias_correction_test.rb | 1 + 71 files changed, 2340 insertions(+), 2430 deletions(-) delete mode 100644 app/logical/downloads/rewrite_strategies/art_station.rb delete mode 100644 app/logical/downloads/rewrite_strategies/base.rb delete mode 100644 app/logical/downloads/rewrite_strategies/deviant_art.rb delete mode 100644 app/logical/downloads/rewrite_strategies/moebooru.rb delete mode 100644 app/logical/downloads/rewrite_strategies/nico_seiga.rb delete mode 100644 app/logical/downloads/rewrite_strategies/nijie.rb delete mode 100644 app/logical/downloads/rewrite_strategies/pawoo.rb delete mode 100644 app/logical/downloads/rewrite_strategies/pixiv.rb delete mode 100644 app/logical/downloads/rewrite_strategies/tumblr.rb delete mode 100644 app/logical/downloads/rewrite_strategies/twitpic.rb delete mode 100644 app/logical/downloads/rewrite_strategies/twitter.rb delete mode 100644 app/logical/sources/site.rb create mode 100644 app/logical/sources/strategies.rb create mode 100644 app/logical/sources/strategies/moebooru.rb create mode 100644 app/logical/sources/strategies/null.rb create mode 100644 db/migrate/20180816230604_rename_alt_source_on_uploads.rb delete mode 100644 test/unit/post_replacement_test.rb diff --git a/app/controllers/sources_controller.rb b/app/controllers/sources_controller.rb index 7c13fc50d..a452134e3 100644 --- a/app/controllers/sources_controller.rb +++ b/app/controllers/sources_controller.rb @@ -1,20 +1,12 @@ class SourcesController < ApplicationController respond_to :json, :xml - rescue_from Sources::Site::NoStrategyError, :with => :no_strategy def show - @source = Sources::Site.new(params[:url], :referer_url => params[:ref]) - @source.get + @source = Sources::Strategies.find(params[:url], params[:ref]) respond_with(@source.to_h) do |format| format.xml { render xml: @source.to_h.to_xml(root: "source") } format.json { render json: @source.to_h.to_json } end end - -protected - - def no_strategy - render json: {message: "Unsupported site"}.to_json, status: 400 - end end diff --git a/app/controllers/uploads_controller.rb b/app/controllers/uploads_controller.rb index e6afe9dbf..2030e6a54 100644 --- a/app/controllers/uploads_controller.rb +++ b/app/controllers/uploads_controller.rb @@ -5,7 +5,7 @@ class UploadsController < ApplicationController def new @upload_notice_wiki = WikiPage.titled(Danbooru.config.upload_notice_wiki_page).first - @upload, @post, @source, @normalized_url, @remote_size = UploadService::ControllerHelper.prepare( + @upload, @post, @source, @remote_size = UploadService::ControllerHelper.prepare( url: params[:url], ref: params[:ref] ) respond_with(@upload) @@ -43,7 +43,7 @@ class UploadsController < ApplicationController end def preprocess - @upload, @post, @source, @normalized_url, @remote_size = UploadService::ControllerHelper.prepare( + @upload, @post, @source, @remote_size = UploadService::ControllerHelper.prepare( url: params[:url], file: params[:file], ref: params[:ref] ) render body: nil diff --git a/app/logical/downloads/file.rb b/app/logical/downloads/file.rb index 66e2d2798..c8bbc0b78 100644 --- a/app/logical/downloads/file.rb +++ b/app/logical/downloads/file.rb @@ -3,16 +3,33 @@ module Downloads class Error < Exception ; end attr_reader :data, :options - attr_accessor :source, :original_source, :downloaded_source + attr_accessor :source, :referer - def initialize(source, options = {}) + # Prevent Cloudflare from potentially mangling the image. See issue #3528. + def self.uncached_url(url, headers = {}) + url = Addressable::URI.parse(url) + + if is_cloudflare?(url, headers) + url.query_values = (url.query_values || {}).merge(danbooru_no_cache: SecureRandom.uuid) + end + + url + end + + def self.is_cloudflare?(url, headers = {}) + Cache.get("is_cloudflare:#{url.origin}", 4.hours) do + res = HTTParty.head(url, { headers: headers }.deep_merge(Danbooru.config.httparty_options)) + raise Error.new("HTTP error code: #{res.code} #{res.message}") unless res.success? + + res.key?("CF-Ray") + end + end + + def initialize(source, referer=nil, options = {}) # source can potentially get rewritten in the course # of downloading a file, so check it again @source = source - @original_source = source - - # the URL actually downloaded after rewriting the original source. - @downloaded_source = nil + @referer = referer # we sometimes need to capture data from the source page @data = {} @@ -22,48 +39,31 @@ module Downloads @data[:get_thumbnail] = options[:get_thumbnail] end - def rewrite_url - url, _, _ = before_download(@source, @data) - return url - end - def size - url, headers, _ = before_download(@source, @data) - options = { timeout: 3, headers: headers }.deep_merge(Danbooru.config.httparty_options) - res = HTTParty.head(url, options) - res.content_length + strategy = Sources::Strategies.find(source, referer) + options = { timeout: 3, headers: strategy.headers }.deep_merge(Danbooru.config.httparty_options) + + res = HTTParty.head(strategy.file_url, options) + + if res.success? + res.content_length + else + raise HTTParty::ResponseError.new(res) + end end def download! - url, headers, @data = before_download(@source, @data) - + strategy = Sources::Strategies.find(source, referer) output_file = Tempfile.new(binmode: true) - http_get_streaming(uncached_url(url, headers), output_file, headers) + @data = strategy.data - @downloaded_source = url - @source = after_download(url) + http_get_streaming( + self.class.uncached_url(strategy.file_url, strategy.headers), + output_file, + strategy.headers + ) - output_file - end - - def before_download(url, datums) - original_url = url - headers = Danbooru.config.http_headers - - RewriteStrategies::Base.strategies.each do |strategy| - url, headers, datums = strategy.new(url).rewrite(url, headers, datums) - url = original_url if url.nil? - end - - return [url, headers, datums] - end - - def after_download(src) - src = fix_twitter_sources(src) - if options[:referer_url].present? - src = set_source_to_referer(src, options[:referer_url]) - end - src + [output_file, strategy] end def validate_local_hosts(url) @@ -111,50 +111,5 @@ module Downloads end end # while end # def - - def fix_twitter_sources(src) - if src =~ %r!^https?://(?:video|pbs)\.twimg\.com/! && original_source =~ %r!^https?://twitter\.com/! - original_source - elsif src =~ %r!^https?://img\.pawoo\.net/! && original_source =~ %r!^https?://pawoo\.net/! - original_source - else - src - end - end - - def set_source_to_referer(src, referer) - if Sources::Strategies::Nijie.url_match?(src) || - Sources::Strategies::Twitter.url_match?(src) || Sources::Strategies::Twitter.url_match?(referer) || - Sources::Strategies::Pawoo.url_match?(src) || - Sources::Strategies::Tumblr.url_match?(src) || Sources::Strategies::Tumblr.url_match?(referer) || - Sources::Strategies::ArtStation.url_match?(src) || Sources::Strategies::ArtStation.url_match?(referer) - strategy = Sources::Site.new(src, :referer_url => referer) - strategy.referer_url - else - src - end - end - - private - - # Prevent Cloudflare from potentially mangling the image. See issue #3528. - def uncached_url(url, headers = {}) - url = Addressable::URI.parse(url) - - if is_cloudflare?(url, headers) - url.query_values = (url.query_values || {}).merge(danbooru_no_cache: SecureRandom.uuid) - end - - url - end - - def is_cloudflare?(url, headers = {}) - Cache.get("is_cloudflare:#{url.origin}", 4.hours) do - res = HTTParty.head(url, { headers: headers }.deep_merge(Danbooru.config.httparty_options)) - raise Error.new("HTTP error code: #{res.code} #{res.message}") unless res.success? - - res.key?("CF-Ray") - end - end end end diff --git a/app/logical/downloads/rewrite_strategies/art_station.rb b/app/logical/downloads/rewrite_strategies/art_station.rb deleted file mode 100644 index 5ded4faf1..000000000 --- a/app/logical/downloads/rewrite_strategies/art_station.rb +++ /dev/null @@ -1,33 +0,0 @@ -module Downloads - module RewriteStrategies - class ArtStation < Base - def rewrite(url, headers, data = {}) - # example: https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974 - if url =~ %r!^https?://cdn\w*\.artstation\.com/p/assets/images/images/\d+/\d+/\d+/(?:medium|small|large)/! - original_url, headers = rewrite_large_url(url, headers) - if http_exists?(original_url, headers) - url = original_url - end - else - url, headers = rewrite_html_url(url, headers) - end - - return [url, headers, data] - end - - protected - def rewrite_html_url(url, headers) - return [url, headers] unless Sources::Strategies::ArtStation.url_match?(url) - source = Sources::Site.new(url) - source.get - [source.image_url, headers] - end - - def rewrite_large_url(url, headers) - # example: https://cdnb3.artstation.com/p/assets/images/images/003/716/071/original/aoi-ogata-hate-city.jpg?1476754974 - url = url.sub(%r!/(?:medium|small|large)/!, "/original/") - return [url, headers] - end - end - end -end diff --git a/app/logical/downloads/rewrite_strategies/base.rb b/app/logical/downloads/rewrite_strategies/base.rb deleted file mode 100644 index d1219397d..000000000 --- a/app/logical/downloads/rewrite_strategies/base.rb +++ /dev/null @@ -1,29 +0,0 @@ -# This is a collection of strategies for normalizing URLs. Most strategies -# typically work by parsing and rewriting the URL itself, but some strategies -# may delegate to Sources::Strategies to obtain a more canonical URL. - -module Downloads - module RewriteStrategies - class Base - attr_reader :url - - def initialize(url = nil) - @url = url - end - - def self.strategies - [Downloads::RewriteStrategies::Pixiv, Downloads::RewriteStrategies::NicoSeiga, Downloads::RewriteStrategies::ArtStation, Downloads::RewriteStrategies::Twitpic, Downloads::RewriteStrategies::DeviantArt, Downloads::RewriteStrategies::Tumblr, Downloads::RewriteStrategies::Moebooru, Downloads::RewriteStrategies::Twitter, Downloads::RewriteStrategies::Nijie, Downloads::RewriteStrategies::Pawoo] - end - - def rewrite(url, headers, data = {}) - return [url, headers, data] - end - - protected - def http_exists?(url, headers) - res = HTTParty.head(url, Danbooru.config.httparty_options.deep_merge(headers: headers)) - res.success? - end - end - end -end diff --git a/app/logical/downloads/rewrite_strategies/deviant_art.rb b/app/logical/downloads/rewrite_strategies/deviant_art.rb deleted file mode 100644 index 4c2411b93..000000000 --- a/app/logical/downloads/rewrite_strategies/deviant_art.rb +++ /dev/null @@ -1,53 +0,0 @@ -module Downloads - module RewriteStrategies - class DeviantArt < Base - attr_accessor :url, :source - - def initialize(url) - @url = url - end - - def rewrite(url, headers, data = {}) - if url =~ %r{deviantart\.com/art/} || url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/} || url =~ %r{deviantart\.net/.+/[a-z0-9_]+(_by_[a-z0-9_]+)?-d([a-z0-9]+)\.}i - url, headers = rewrite_html_pages(url, headers) - url, headers = rewrite_thumbnails(url, headers) - end - - return [url, headers, data] - end - - protected - def rewrite_html_pages(url, headers) - if url =~ %r{^https?://.+?\.deviantart\.com/art/} || url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/} - return [source.image_url, headers] - else - return [url, headers] - end - end - - def rewrite_thumbnails(url, headers) - if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)200H/} - match = $1 - url.sub!(match + "200H/", match) - elsif url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)PRE/} - match = $1 - url.sub!(match + "PRE/", match) - elsif url =~ %r{^https?://(?:pre|img)\d{2}\.deviantart\.net/} - return [source.image_url, headers] - end - - return [url, headers] - end - - # Cache the source data so it gets fetched at most once. - def source - @source ||= begin - source = ::Sources::Strategies::DeviantArt.new(url) - source.get - - source - end - end - end - end -end diff --git a/app/logical/downloads/rewrite_strategies/moebooru.rb b/app/logical/downloads/rewrite_strategies/moebooru.rb deleted file mode 100644 index 6338f1dee..000000000 --- a/app/logical/downloads/rewrite_strategies/moebooru.rb +++ /dev/null @@ -1,26 +0,0 @@ -module Downloads - module RewriteStrategies - class Moebooru < Base - DOMAINS = '(?:[^.]+\.)?yande\.re|konachan\.com' - - def rewrite(url, headers, data = {}) - if url =~ %r{https?://(?:#{DOMAINS})} - url, headers = rewrite_jpeg_versions(url, headers) - end - - return [url, headers, data] - end - - protected - def rewrite_jpeg_versions(url, headers) - # example: https://yande.re/jpeg/2c6876ac2317fce617e3c5f1a642123b/yande.re%20292092%20hatsune_miku%20tid%20vocaloid.jpg - - if url =~ %r{\A(https?://(?:#{DOMAINS}))/jpeg/([a-f0-9]+(?:/.*)?)\.jpg\Z} - url = $1 + "/image/" + $2 + ".png" - end - - return [url, headers] - end - end - end -end diff --git a/app/logical/downloads/rewrite_strategies/nico_seiga.rb b/app/logical/downloads/rewrite_strategies/nico_seiga.rb deleted file mode 100644 index aa6b08740..000000000 --- a/app/logical/downloads/rewrite_strategies/nico_seiga.rb +++ /dev/null @@ -1,66 +0,0 @@ -module Downloads - module RewriteStrategies - class NicoSeiga < Base - attr_accessor :url, :source - - def initialize(url) - @url = url - end - - def rewrite(url, headers, data = {}) - if url =~ %r{https?://lohas\.nicoseiga\.jp} || url =~ %r{https?://seiga\.nicovideo\.jp} - url, headers = rewrite_headers(url, headers) - url, headers = rewrite_html_pages(url, headers) - url, headers = rewrite_thumbnails(url, headers) - url, headers = rewrite_view_big_pages(url, headers) - end - - return [url, headers, data] - end - - protected - def rewrite_headers(url, headers) - headers["Referer"] = "http://seiga.nicovideo.jp" - return [url, headers] - end - - def rewrite_html_pages(url, headers) - # example: http://seiga.nicovideo.jp/seiga/im1389842 - - if url =~ %r{https?://seiga\.nicovideo\.jp/seiga/im\d+} - return [source.image_url, headers] - else - return [url, headers] - end - end - - def rewrite_thumbnails(url, headers) - if url =~ %r{/thumb/\d+} - return [source.image_url, headers] - end - - return [url, headers] - end - - def rewrite_view_big_pages(url, headers) - # example: http://lohas.nicoseiga.jp/o/40aeedd2848a7780b6046747e75b3566b423a10c/1436307639/5026559 - - if url =~ %r{http://lohas\.nicoseiga\.jp/o/} - return [source.image_url, headers] - else - return [url, headers] - end - end - - # Cache the source data so it gets fetched at most once. - def source - @source ||= begin - source = ::Sources::Strategies::NicoSeiga.new(url) - source.get - - source - end - end - end - end -end diff --git a/app/logical/downloads/rewrite_strategies/nijie.rb b/app/logical/downloads/rewrite_strategies/nijie.rb deleted file mode 100644 index ecd14e52d..000000000 --- a/app/logical/downloads/rewrite_strategies/nijie.rb +++ /dev/null @@ -1,40 +0,0 @@ -module Downloads - module RewriteStrategies - class Nijie < Base - attr_accessor :url, :source - - def initialize(url) - @url = url - end - - def rewrite(url, headers, data = {}) - if url =~ %r{https?://nijie\.info\/view\.php.+id=\d+} - url, headers = rewrite_html_pages(url, headers) - end - - return [url, headers, data] - end - - protected - def rewrite_html_pages(url, headers) - # example: http://nijie.info/view.php?id=151126 - - if url =~ %r{https?://nijie\.info\/view\.php.+id=\d+} - return [source.image_url, headers] - else - return [url, headers] - end - end - - # Cache the source data so it gets fetched at most once. - def source - @source ||= begin - source = ::Sources::Strategies::Nijie.new(url) - source.get - - source - end - end - end - end -end diff --git a/app/logical/downloads/rewrite_strategies/pawoo.rb b/app/logical/downloads/rewrite_strategies/pawoo.rb deleted file mode 100644 index 3a0e3690e..000000000 --- a/app/logical/downloads/rewrite_strategies/pawoo.rb +++ /dev/null @@ -1,17 +0,0 @@ -module Downloads - module RewriteStrategies - class Pawoo < Base - def rewrite(url, headers, data = {}) - if Sources::Strategies::Pawoo.url_match?(url) - source = Sources::Strategies::Pawoo.new(url) - source.get - url = source.image_url - elsif url =~ %r!\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)/small/([a-z0-9]+\.\w+)\z!i - url = "https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}" - end - - return [url, headers, data] - end - end - end -end diff --git a/app/logical/downloads/rewrite_strategies/pixiv.rb b/app/logical/downloads/rewrite_strategies/pixiv.rb deleted file mode 100644 index ec897695f..000000000 --- a/app/logical/downloads/rewrite_strategies/pixiv.rb +++ /dev/null @@ -1,127 +0,0 @@ -module Downloads - module RewriteStrategies - class Pixiv < Base - attr_accessor :url, :source - - def initialize(url) - @url = url - end - - def rewrite(url, headers, data = {}) - if url =~ /\Ahttps?:\/\/(?:\w+\.)?pixiv\.net/ || url =~ /\Ahttps?:\/\/i\.pximg\.net/ - url, headers = rewrite_headers(url, headers) - url, headers = rewrite_cdn(url, headers) - end - - if (url =~ /\Ahttps?:\/\/(?:\w+\.)?pixiv\.net/ || url =~ /\Ahttps?:\/\/i\.pximg\.net/) && source.illust_id_from_url - url, headers = rewrite_html_pages(url, headers) - url, headers = rewrite_thumbnails(url, headers) - url, headers = rewrite_old_small_manga_pages(url, headers) - url, headers = rewrite_to_thumbnails(url, headers) if data.delete(:get_thumbnail) - end - - # http://i2.pixiv.net/img-zip-ugoira/img/2014/08/05/06/01/10/44524589_ugoira1920x1080.zip - if url =~ %r!\Ahttps?://(i\d+\.pixiv|i\.pximg)\.net/img-zip-ugoira/img/\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2}/\d+_ugoira\d+x\d+\.zip\z!i - data[:is_ugoira] = true - data[:ugoira_frame_data] = source.ugoira_frame_data - data[:ugoira_content_type] = source.ugoira_content_type - end - - return [url, headers, data] - rescue PixivApiClient::BadIDError, Sources::Site::NoStrategyError - return [url, headers, data] - end - - protected - def rewrite_to_thumbnails(url, headers) - if url =~ %r!https?://(i\d+)\.pixiv\.net/img-zip-ugoira/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+)_ugoira\d+x\d+\.zip! - url = "http://#{$1}.pixiv.net/c/150x150/img-master/img/#{$2}/#{$3}_master1200.jpg" - - elsif url =~ %r!https?://i\.pximg\.net/img-zip-ugoira/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+)_ugoira\d+x\d+\.zip! - url = "http://i.pximg.net/c/150x150/img-master/img/#{$1}/#{$2}_master1200.jpg" - - elsif url =~ %r!https?://(i\d+)\.pixiv\.net/img-original/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+_p\d+)\.! - url = "http://#{$1}.pixiv.net/c/150x150/img-master/img/#{$2}/#{$3}_master1200.jpg" - - elsif url =~ %r!https?://i\.pximg\.net/img-original/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+_p\d+)\.! - url = "http://i.pximg.net/c/150x150/img-master/img/#{$1}/#{$2}_master1200.jpg" - - elsif url =~ %r!https?://(i\d+)\.pixiv\.net/img(\d+)/img/(.+?)/(\d+)\.! - url = "http://#{$1}.pixiv.net/img#{$2}/img/#{$3}/mobile/#{$4}_240mw.jpg" - - elsif url =~ %r!https?://i\.pximg\.net/img(\d+)/img/(.+?)/(\d+)\.! - url = "http://#{$1}.pixiv.net/img#{$2}/img/#{$3}/mobile/#{$4}_240mw.jpg" - end - - return [url, headers] - end - - def rewrite_headers(url, headers) - headers["Referer"] = "http://www.pixiv.net" - return [url, headers] - end - - # Rewrite these: - # http://www.pixiv.net/i/18557054 - # http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054 - # http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054 - # http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054 - # http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1 - # Plus this: - # i2.pixiv.net/img-inf/img/2014/09/25/00/57/24/46170939_64x64.jpg - def rewrite_html_pages(url, headers) - if url =~ /illust_id=\d+/i || url =~ %r!pixiv\.net/img-inf/img/!i - return [source.file_url, headers] - else - return [url, headers] - end - end - - # Rewrite these: - # http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_m.jpg - # http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg - def rewrite_thumbnails(url, headers) - url = source.rewrite_thumbnails(url) - return [url, headers] - end - - # Rewrite these: - # http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg - # http://img04.pixiv.net/img/syounen_no_uta/46170939_p0.jpg - # but not these: - # http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_big_p0.jpg - # http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg - # http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg - def rewrite_old_small_manga_pages(url, headers) - if url !~ %r!/img-(?:original|master)/img/!i && url =~ %r!/(\d+_p\d+)\.!i - match = $1 - repl = match.sub(/_p/, "_big_p") - big_url = url.sub(match, repl) - if http_exists?(big_url, headers) - url = big_url - end - end - - return [url, headers] - end - - def rewrite_cdn(url, headers) - if url =~ %r{https?:\/\/(?:\w+\.)?pixiv\.net\.edgesuite\.net} - url = url.sub(".edgesuite.net", "") - end - - return [url, headers] - end - - # Cache the source data so it gets fetched at most once. - def source - @source ||= begin - source = ::Sources::Site.new(url) - source.get - - source - end - end - end - end -end diff --git a/app/logical/downloads/rewrite_strategies/tumblr.rb b/app/logical/downloads/rewrite_strategies/tumblr.rb deleted file mode 100644 index 6fa87c78c..000000000 --- a/app/logical/downloads/rewrite_strategies/tumblr.rb +++ /dev/null @@ -1,70 +0,0 @@ -module Downloads - module RewriteStrategies - DOMAIN = '(data|(\d+\.)?media)\.tumblr\.com' - MD5 = '(?[0-9a-f]{32})' - FILENAME = '(?(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)' - SIZES = '(250|400|500|500h|540|1280|raw)' - EXT = '(?\w+)' - - class Tumblr < Base - def rewrite(url, headers, data = {}) - url = rewrite_cdn(url) - url = rewrite_samples(url, headers) - url = rewrite_html_pages(url) - - return [url, headers, data] - end - - protected - # Look for the biggest available version on data.tumblr.com. A bigger - # version may or may not exist. - # - # http://40.media.tumblr.com/d8c6d49785c0842ee31ff26c010b7445/tumblr_naypopLln51tkufhoo2_500h.png - # => http://data.tumblr.com/d8c6d49785c0842ee31ff26c010b7445/tumblr_naypopLln51tkufhoo2_raw.png - # - # https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg - # => http://data.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg - # - # https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif - # => http://data.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif - # - # https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png - # => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png - # - # http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg - # => http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg - # - # http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg - # => http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg - def rewrite_samples(url, headers) - if url =~ %r!\Ahttps?://#{DOMAIN}/(?#{MD5}/)?#{FILENAME}_#{SIZES}\.#{EXT}\z!i - sizes = ["raw", 1280, 640, 540, "500h", 500, 400, 250] - candidates = sizes.map do |size| - "http://data.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}" - end - - url = candidates.find do |candidate| - http_exists?(candidate, headers) - end - end - - url - end - - # https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png - # => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png - def rewrite_cdn(url) - url.sub!(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/data\.tumblr\.com!i, "http://data.tumblr.com") - url - end - - def rewrite_html_pages(url) - if Sources::Strategies::Tumblr.url_match?(url) - url = Sources::Strategies::Tumblr.new(url).image_url - end - - url - end - end - end -end diff --git a/app/logical/downloads/rewrite_strategies/twitpic.rb b/app/logical/downloads/rewrite_strategies/twitpic.rb deleted file mode 100644 index b8a07195e..000000000 --- a/app/logical/downloads/rewrite_strategies/twitpic.rb +++ /dev/null @@ -1,36 +0,0 @@ -module Downloads - module RewriteStrategies - class Twitpic < Base - def rewrite(url, headers, data = {}) - if url =~ %r{https?://twitpic\.com} || url =~ %r{^https?://(?:d3j5vwomefv46c|dn3pm25xmtlyu)\.cloudfront\.net} - url, headers = rewrite_html_pages(url, headers) - url, headers = rewrite_thumbnails(url, headers) - end - - return [url, headers, data] - end - - protected - def rewrite_html_pages(url, headers) - # example: http://twitpic.com/cpprns - - if url =~ %r{https?://twitpic\.com/([a-z0-9]+)$} - id = $1 - url = "http://twitpic.com/show/full/#{id}" - return [url, headers] - else - return [url, headers] - end - end - - def rewrite_thumbnails(url, headers) - if url =~ %r{^https?://(?:d3j5vwomefv46c|dn3pm25xmtlyu)\.cloudfront\.net/photos/thumb/(\d+\..+)$} - match = $1 - url.sub!("/thumb/" + match, "/large/" + match) - end - - return [url, headers] - end - end - end -end diff --git a/app/logical/downloads/rewrite_strategies/twitter.rb b/app/logical/downloads/rewrite_strategies/twitter.rb deleted file mode 100644 index c3c56725c..000000000 --- a/app/logical/downloads/rewrite_strategies/twitter.rb +++ /dev/null @@ -1,40 +0,0 @@ -module Downloads - module RewriteStrategies - class Twitter < Base - attr_accessor :url, :source - - def initialize(url) - @url = url - end - - def rewrite(url, headers, data = {}) - if url =~ %r!^https?://(?:mobile\.)?twitter\.com! - url = source.image_url - elsif url =~ %r{^https?://pbs\.twimg\.com} - url, headers = rewrite_thumbnails(url, headers, data) - end - - return [url, headers, data] - end - - protected - def rewrite_thumbnails(url, headers, data) - if url =~ %r{^(https?://pbs\.twimg\.com/media/[^:]+)} - url = $1 + ":orig" - end - - return [url, headers] - end - - # Cache the source data so it gets fetched at most once. - def source - @source ||= begin - source = ::Sources::Strategies::Twitter.new(url) - source.get - - source - end - end - end - end -end diff --git a/app/logical/image_proxy.rb b/app/logical/image_proxy.rb index 5b05627d2..f10fd53c4 100644 --- a/app/logical/image_proxy.rb +++ b/app/logical/image_proxy.rb @@ -1,12 +1,10 @@ class ImageProxy def self.needs_proxy?(url) fake_referer_for(url).present? - rescue Sources::Site::NoStrategyError - false end def self.fake_referer_for(url) - Sources::Site.new(url).strategy.try(:fake_referer) + Sources::Strategies.find(url).headers["Referer"] end def self.get_image(url) diff --git a/app/logical/iqdb/download.rb b/app/logical/iqdb/download.rb index bdb74ae55..200f17572 100644 --- a/app/logical/iqdb/download.rb +++ b/app/logical/iqdb/download.rb @@ -10,11 +10,9 @@ module Iqdb headers = {} datums = {} - Downloads::RewriteStrategies::Base.strategies.each do |strategy| - url, headers, datums = strategy.new(url).rewrite(url, headers, datums) - end + strategy = Sources::Strategies.find(url) - [url, headers["Referer"]] + [strategy.image_url, strategy.headers["Referer"]] end def self.find_similar(source) diff --git a/app/logical/nico_seiga_api_client.rb b/app/logical/nico_seiga_api_client.rb index a29b0b5f7..73af88bcd 100644 --- a/app/logical/nico_seiga_api_client.rb +++ b/app/logical/nico_seiga_api_client.rb @@ -12,6 +12,8 @@ class NicoSeigaApiClient resp = HTTParty.get(uri, Danbooru.config.httparty_options) if resp.success? parse_illust_xml_response(resp.body) + else + raise HTTParty::ResponseError.new(resp) end end @@ -20,6 +22,8 @@ class NicoSeigaApiClient resp = HTTParty.get(uri, Danbooru.config.httparty_options) if resp.success? parse_artist_xml_response(resp.body) + else + raise HTTParty::ResponseError.new(resp) end end @@ -34,6 +38,6 @@ class NicoSeigaApiClient @image_id = image["id"].to_i @user_id = image["user_id"].to_i @title = image["title"] - @desc = image["description"] + @desc = image["description"] || image["summary"] end end diff --git a/app/logical/pawoo_api_client.rb b/app/logical/pawoo_api_client.rb index 542911a4f..7803ba2ca 100644 --- a/app/logical/pawoo_api_client.rb +++ b/app/logical/pawoo_api_client.rb @@ -1,14 +1,26 @@ class PawooApiClient extend Memoist + PROFILE1 = %r!\Ahttps?://pawoo\.net/web/accounts/(\d+)! + PROFILE2 = %r!\Ahttps?://pawoo\.net/@([^/]+)! + STATUS1 = %r!\Ahttps?://pawoo\.net/web/statuses/(\d+)! + STATUS2 = %r!\Ahttps?://pawoo\.net/@.+?/([^/]+)! + class MissingConfigurationError < Exception ; end class Account attr_reader :json def self.is_match?(url) - url =~ %r!https?://pawoo.net/web/accounts/(\d+)! - $1 + if url =~ PROFILE1 + return $1 + end + + if url =~ PROFILE2 + return $1 + end + + false end def initialize(json) @@ -44,8 +56,15 @@ class PawooApiClient attr_reader :json def self.is_match?(url) - url =~ %r!https?://pawoo.net/web/statuses/(\d+)! || url =~ %r!https?://pawoo.net/@.+?/(\d+)! - $1 + if url =~ STATUS1 + return $1 + end + + if url =~ STATUS2 + return $1 + end + + false end def initialize(json) @@ -82,11 +101,11 @@ class PawooApiClient def get(url) if id = Status.is_match?(url) - Status.new(JSON.parse(access_token.get("/api/v1/statuses/#{id}").body)) - elsif id = Account.is_match?(url) - Account.new(JSON.parse(access_token.get("/api/v1/accounts/#{id}").body)) - else - nil + return Status.new(JSON.parse(access_token.get("/api/v1/statuses/#{id}").body)) + end + + if id = Account.is_match?(url) + return Account.new(JSON.parse(access_token.get("/api/v1/accounts/#{id}").body)) end end diff --git a/app/logical/pixiv_api_client.rb b/app/logical/pixiv_api_client.rb index 00ba4b90a..41cbc2d37 100644 --- a/app/logical/pixiv_api_client.rb +++ b/app/logical/pixiv_api_client.rb @@ -1,6 +1,8 @@ require 'resolv-replace' class PixivApiClient + extend Memoist + API_VERSION = "1" CLIENT_ID = "bYGKuGVw91e0NMfPGp44euvGt59s" CLIENT_SECRET = "HP3RmkgAmEGro0gn1x9ioawQE8WMfvLXDz3ZqxpK" @@ -23,90 +25,11 @@ class PixivApiClient class Error < Exception ; end class BadIDError < Error ; end - class WorksResponse + class WorkResponse attr_reader :json, :pages, :name, :moniker, :user_id, :page_count, :tags attr_reader :artist_commentary_title, :artist_commentary_desc def initialize(json) - # Sample response: - # { - # "status": "success", - # "response": [ - # { - # "id": 49270482, - # "title": "ツイログ", - # "caption": null, - # "tags": [ - # "神崎蘭子", - # "双葉杏", - # "アイドルマスターシンデレラガールズ", - # "Star!!", - # "アイマス5000users入り" - # ], - # "tools": [ - # "CLIP STUDIO PAINT" - # ], - # "image_urls": { - # "large": "http://i3.pixiv.net/img-original/img/2015/03/14/17/53/32/49270482_p0.jpg" - # }, - # "width": 1200, - # "height": 951, - # "stats": { - # "scored_count": 8247, - # "score": 81697, - # "views_count": 191630, - # "favorited_count": { - # "public": 7804, - # "private": 745 - # }, - # "commented_count": 182 - # }, - # "publicity": 0, - # "age_limit": "all-age", - # "created_time": "2015-03-14 17:53:32", - # "reuploaded_time": "2015-03-14 17:53:32", - # "user": { - # "id": 341433, - # "account": "nardack", - # "name": "Nardack", - # "is_following": false, - # "is_follower": false, - # "is_friend": false, - # "is_premium": null, - # "profile_image_urls": { - # "px_50x50": "http://i1.pixiv.net/img19/profile/nardack/846482_s.jpg" - # }, - # "stats": null, - # "profile": null - # }, - # "is_manga": true, - # "is_liked": false, - # "favorite_id": 0, - # "page_count": 2, - # "book_style": "none", - # "type": "illustration", - # "metadata": { - # "pages": [ - # { - # "image_urls": { - # "large": "http://i3.pixiv.net/img-original/img/2015/03/14/17/53/32/49270482_p0.jpg", - # "medium": "http://i3.pixiv.net/c/1200x1200/img-master/img/2015/03/14/17/53/32/49270482_p0_master1200.jpg" - # } - # }, - # { - # "image_urls": { - # "large": "http://i3.pixiv.net/img-original/img/2015/03/14/17/53/32/49270482_p1.jpg", - # "medium": "http://i3.pixiv.net/c/1200x1200/img-master/img/2015/03/14/17/53/32/49270482_p1_master1200.jpg" - # } - # } - # ] - # }, - # "content_type": null - # } - # ], - # "count": 1 - # } - @json = json @name = json["user"]["name"] @user_id = json["user"]["id"] @@ -131,7 +54,105 @@ class PixivApiClient end end - def works(illust_id) + class NovelResponse + extend Memoist + + attr_reader :json + + def initialize(json) + @json = json + end + + def name + json["user"]["name"] + end + + def user_id + json["user"]["id"] + end + + def moniker + json["user"]["account"] + end + + def page_count + json["page_count"].to_i + end + + def artist_commentary_title + json["title"] + end + + def artist_commentary_desc + json["caption"] + end + + def tags + json["tags"] + end + + def pages + # ex: + # https://i.pximg.net/c/150x150_80/novel-cover-master/img/2017/07/27/23/14/17/8465454_80685d10e6df4d7d53ad347ddc18a36b_master1200.jpg (6096b) + # => + # https://i.pximg.net/novel-cover-original/img/2017/07/27/23/14/17/8465454_80685d10e6df4d7d53ad347ddc18a36b.jpg (532129b) + [find_original(json["image_urls"]["small"])] + end + memoize :pages + + public + PXIMG = %r!\Ahttps?://i\.pximg\.net/c/\d+x\d+_\d+/novel-cover-master/img/(?\d+/\d+/\d+/\d+/\d+/\d+)/(?\d+_[a-f0-9]+)_master\d+\.(?jpg|jpeg|png|gif)!i + + def find_original(x) + if x =~ PXIMG + return "https://i.pximg.net/novel-cover-original/img/#{$~[:timestamp]}/#{$~[:filename]}.#{$~[:ext]}" + end + + return x + end + end + + class FanboxResponse + attr_reader :json + + def initialize(json) + @json = json + end + + def name + json["body"]["user"]["name"] + end + + def user_id + json["body"]["user"]["userId"] + end + + def moniker + raise NotImplementedError + end + + def page_count + json["body"]["body"]["images"].size + end + + def artist_commentary_title + json["body"]["title"] + end + + def artist_commentary_desc + json["body"]["body"]["text"] + end + + def tags + [] + end + + def pages + json["body"]["body"]["images"].map {|x| x["originalUrl"]} + end + end + + def work(illust_id) headers = Danbooru.config.http_headers.merge( "Referer" => "http://www.pixiv.net", "Content-Type" => "application/x-www-form-urlencoded", @@ -148,7 +169,7 @@ class PixivApiClient json = JSON.parse(body) if resp.success? - WorksResponse.new(json["response"][0]) + WorkResponse.new(json["response"][0]) elsif json["status"] == "failure" && json.dig("errors", "system", "message") =~ /対象のイラストは見つかりませんでした。/ raise BadIDError.new("Pixiv ##{illust_id} not found: work was deleted, made private, or ID is invalid.") else @@ -158,6 +179,40 @@ class PixivApiClient raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})") end + def fanbox(fanbox_id) + url = "https://www.pixiv.net/ajax/fanbox/post?postId=#{fanbox_id.to_i}" + resp = agent.get(url) + json = JSON.parse(resp.body) + if resp.code == "200" + FanboxResponse.new(json) + elsif json["status"] == "failure" + raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})") + end + rescue JSON::ParserError + raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})") + end + + def novel(novel_id) + headers = Danbooru.config.http_headers.merge( + "Referer" => "http://www.pixiv.net", + "Content-Type" => "application/x-www-form-urlencoded", + "Authorization" => "Bearer #{access_token}" + ) + + url = "https://public-api.secure.pixiv.net/v#{API_VERSION}/novels/#{novel_id.to_i}.json" + resp = HTTParty.get(url, Danbooru.config.httparty_options.deep_merge(headers: headers)) + body = resp.body.force_encoding("utf-8") + json = JSON.parse(body) + + if resp.success? + NovelResponse.new(json["response"][0]) + elsif json["status"] == "failure" && json.dig("errors", "system", "message") =~ /対象のイラストは見つかりませんでした。/ + raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})") + end + rescue JSON::ParserError + raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})") + end + def access_token Cache.get("pixiv-papi-access-token", 3000) do access_token = nil @@ -186,4 +241,9 @@ class PixivApiClient access_token end end + + def agent + PixivWebAgent.build + end + memoize :agent end diff --git a/app/logical/sources/site.rb b/app/logical/sources/site.rb deleted file mode 100644 index 8d221da07..000000000 --- a/app/logical/sources/site.rb +++ /dev/null @@ -1,78 +0,0 @@ -# encoding: UTF-8 - -module Sources - class Site - class NoStrategyError < RuntimeError ; end - - attr_reader :strategy - delegate :url, :get, :get_size, :site_name, :artist_name, - :profile_url, :image_url, :tags, :artists, :unique_id, - :file_url, :ugoira_frame_data, :ugoira_content_type, :image_urls, - :artist_commentary_title, :artist_commentary_desc, - :dtext_artist_commentary_title, :dtext_artist_commentary_desc, - :rewrite_thumbnails, :illust_id_from_url, :translate_tag, :translated_tags, :to => :strategy - - def self.strategies - [Strategies::Pixiv, Strategies::NicoSeiga, Strategies::DeviantArt, Strategies::ArtStation, Strategies::Nijie, Strategies::Twitter, Strategies::Tumblr, Strategies::Pawoo] - end - - def initialize(url, referer_url: nil) - @url = url - - Site.strategies.each do |strategy| - if strategy.url_match?(url) || strategy.url_match?(referer_url) - @strategy = strategy.new(url, referer_url) - return - end - end - - raise NoStrategyError.new - end - - def referer_url - strategy.try(:referer_url) - end - - def normalized_for_artist_finder? - available? && strategy.normalized_for_artist_finder? - end - - def normalize_for_artist_finder! - if available? && strategy.normalizable_for_artist_finder? - strategy.normalize_for_artist_finder! - else - url - end - rescue - url - end - - def to_h - return { - :artist_name => artist_name, - :artists => artists.as_json(include: :sorted_urls), - :profile_url => profile_url, - :image_url => image_url, - :image_urls => image_urls, - :normalized_for_artist_finder_url => normalize_for_artist_finder!, - :tags => tags, - :translated_tags => translated_tags, - :unique_id => unique_id, - :artist_commentary => { - :title => artist_commentary_title, - :description => artist_commentary_desc, - :dtext_title => dtext_artist_commentary_title, - :dtext_description => dtext_artist_commentary_desc, - } - } - end - - def to_json - to_h.to_json - end - - def available? - strategy.present? - end - end -end diff --git a/app/logical/sources/strategies.rb b/app/logical/sources/strategies.rb new file mode 100644 index 000000000..aad194f3e --- /dev/null +++ b/app/logical/sources/strategies.rb @@ -0,0 +1,29 @@ +module Sources + module Strategies + def self.all + return [ + Strategies::Pixiv, + Strategies::NicoSeiga, + Strategies::Twitter, + Strategies::DeviantArt, + Strategies::Tumblr, + Strategies::ArtStation, + Strategies::Nijie, + Strategies::Pawoo, + Strategies::Moebooru, + + Strategies::Null # MUST BE LAST! + ] + end + + def self.find(url, referer=nil) + all + .detect { |strategy| strategy.match?(url, referer) } + .new(url, referer) + end + + def self.canonical(url, referer) + find(url, referer).canonical_url + end + end +end diff --git a/app/logical/sources/strategies/art_station.rb b/app/logical/sources/strategies/art_station.rb index 6ca54a5c6..075ed6b73 100644 --- a/app/logical/sources/strategies/art_station.rb +++ b/app/logical/sources/strategies/art_station.rb @@ -1,68 +1,165 @@ module Sources::Strategies class ArtStation < Base + PROJECT = %r!\Ahttps?://[a-z0-9-]+\.artstation\.com/(?:artwork|projects)/(?[a-z0-9-]+)/?\z!i + ASSET = %r!\Ahttps?://cdn\w*\.artstation\.com/p/assets/images/images/\d+/\d+/\d+/(?:medium|small|large)/!i + PROFILE1 = %r!\Ahttps?://(\w+)\.artstation\.com!i + PROFILE2 = %r!\Ahttps?://www.artstation.com/artist/(\w+)!i + PROFILE3 = %r!\Ahttps?://www.artstation.com/(\w+)!i + PROFILE = %r!#{PROFILE2}|#{PROFILE3}|#{PROFILE1}! + attr_reader :json, :image_urls - def self.url_match?(url) - self.project_id(url).present? + def self.match?(*urls) + urls.compact.any? { |x| x.match?(PROJECT) || x.match?(ASSET) || x.match?(PROFILE)} end # https://www.artstation.com/artwork/04XA4 # https://www.artstation.com/artwork/cody-from-sf # https://sa-dui.artstation.com/projects/DVERn def self.project_id(url) - if url =~ %r!\Ahttps?://[a-z0-9-]+\.artstation\.com/(?:artwork|projects)/(?[a-z0-9-]+)\z!i + if url =~ PROJECT $~[:project_id] else nil end end - def referer_url - if self.class.url_match?(@referer_url) - @referer_url - else - @url - end - end - def site_name "ArtStation" end - def project_id - self.class.project_id(referer_url) + def image_urls + image_urls_sub + .map { |asset| original_asset_url(asset) } end + memoize :image_urls def page_url - "https://www.artstation.com/artwork/#{project_id}" + [url, referer_url].each do |x| + if x =~ PROJECT + return "https://www.artstation.com/artwork/#{$~[:project_id]}" + end + end + + return super end + def profile_url + if url =~ PROFILE1 && $1 != "www" + return "https://www.artstation.com/#{$1}" + end + + if url =~ PROFILE2 + return "https://www.artstation.com/#{$1}" + end + + if url =~ PROFILE3 && url !~ PROJECT + return url + end + + api_json["user"]["permalink"] + end + + def artist_name + api_json["user"]["username"] + end + + def artist_commentary_title + api_json["title"] + end + + def artist_commentary_desc + ActionView::Base.full_sanitizer.sanitize(api_json["description"]) + end + memoize :artist_commentary_desc + + def tags + return nil if !api_json.has_key?("tags") + + api_json["tags"]. + map { |tag| [tag.downcase.tr(" ", "_"), tag_url(tag)]} + end + memoize :tags + + def normalized_for_artist_finder? + url =~ PROFILE3 && url !~ PROFILE2 && url !~ PROJECT + end + + def normalizable_for_artist_finder? + url =~ PROFILE || url =~ PROJECT + end + + def normalize_for_artist_finder + profile_url + end + + public + + def image_urls_sub + if url.match?(ASSET) + return [url] + end + + api_json["assets"] + .select { |asset| asset["asset_type"] == "image" } + .map { |asset| asset["image_url"] } + end + + # these are de facto private methods but are public for testing + # purposes + + def project_id + self.class.project_id(url) || self.class.project_id(referer_url) + end + memoize :project_id + def api_url "https://www.artstation.com/projects/#{project_id}.json" end - def image_url - image_urls.first + def api_json + if project_id.nil? + raise ::Sources::Error.new("Project id could not be determined from (#{url}, #{referer_url})") + end + + resp = HTTParty.get(api_url, Danbooru.config.httparty_options) + + if resp.success? + json = JSON.parse(resp.body) + else + raise HTTParty::ResponseError.new(resp) + end + + return json + end + memoize :api_json + + # Returns the original representation of the asset, if it exists. Otherwise + # return the url. + def original_asset_url(x) + if x =~ ASSET + # example: https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974 + original_url = x.sub(%r!/(?:medium|small|large)/!, "/original/") + + if http_exists?(original_url, headers) + return original_url + end + + if x =~ /medium|small/ + large_url = x.sub(%r!/(?:medium|small)/!, "/large/") + + if http_exists?(large_url, headers) + return large_url + end + end + end + + return x end - def get - resp = HTTParty.get(api_url, Danbooru.config.httparty_options) - image_url_rewriter = Downloads::RewriteStrategies::ArtStation.new - if resp.success? - @json = JSON.parse(resp.body) - @artist_name = json["user"]["username"] - @profile_url = json["user"]["permalink"] - images = json["assets"].select { |asset| asset["asset_type"] == "image" } - @image_urls = images.map do |x| - y, _, _ = image_url_rewriter.rewrite(x["image_url"], nil) - y - end - @tags = json["tags"].map {|x| [x.downcase.tr(" ", "_"), "https://www.artstation.com/search?q=" + CGI.escape(x)]} if json["tags"] - @artist_commentary_title = json["title"] - @artist_commentary_desc = ActionView::Base.full_sanitizer.sanitize(json["description"]) - else - raise "HTTP error code: #{resp.code} #{resp.message}" - end + def tag_url(name) + "https://www.artstation.com/search?q=" + CGI.escape(name) end + end end diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index 2f576f667..2ca5c4796 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -1,41 +1,108 @@ # This is a collection of strategies for extracting information about a # resource. At a minimum it tries to extract the artist name and a canonical # URL to download the image from. But it can also be used to normalize a URL -# for use with the artist finder. It differs from Downloads::RewriteStrategies -# in that the latter is more for normalizing and rewriting a URL until it is -# suitable for downloading, whereas Sources::Strategies is more for meta-data -# that can only be obtained by downloading and parsing the resource. +# for use with the artist finder. +# +# Design Principles +# +# In general you should minimize state. You can safely assume that url +# and referer_url will not change over the lifetime of an instance, +# so you can safely memoize methods and their results. A common pattern is +# conditionally making an external API call and parsing its response. You should +# make this call on demand and memoize the response. module Sources module Strategies class Base attr_reader :url, :referer_url - attr_reader :artist_name, :profile_url, :image_url, :tags - attr_reader :artist_commentary_title, :artist_commentary_desc - def self.url_match?(url) + extend Memoist + + def self.match?(*urls) false end + # * url - Should point to a resource suitable for + # downloading. This may sometimes point to the binary file. + # It may also point to the artist's profile page, in cases + # where this class is being used to normalize artist urls. + # Implementations should be smart enough to detect this and + # behave accordingly. + # * referer_url - Sometimes the HTML page cannot be + # determined from url. You should generally pass in a + # referrer_url so the strategy can discover the HTML + # page and other information. def initialize(url, referer_url = nil) @url = url @referer_url = referer_url end - # No remote calls are made until this method is called. - def get + def site_name raise NotImplementedError end - def get_size - @get_size ||= Downloads::File.new(@image_url).size + # Whatever url is, this method should return the direct links + # to the canonical binary files. It should not be an HTML page. It should + # be a list of JPEG, PNG, GIF, WEBM, MP4, ZIP, etc. It is what the + # downloader will fetch and save to disk. + def image_urls + raise NotImplementedError end + def image_url + image_urls.first + end + + # Whatever url is, this method should return a link to the HTML + # page containing the resource. It should not be a binary file. It will + # eventually be assigned as the source for the post, but it does not + # represent what the downloader will fetch. + def page_url + Rails.logger.warn "Valid page url for (#{url}, #{referer_url}) not found" + + return nil + end + + # This will be the url stored in posts. Typically this is the page + # url, but on some sites it may be preferable to store the image url. + def canonical_url + page_url + end + + # A link to the artist's profile page on the site. + def profile_url + nil + end + + def artist_name + raise NotImplementedError + end + + def artist_commentary_title + nil + end + + def artist_commentary_desc + nil + end + + # Subclasses should merge in any required headers needed to access resources + # on the site. + def headers + return Danbooru.config.http_headers + end + + # Returns the size of the image resource without actually downloading the file. + def size + Downloads::File.new(image_url).size + end + memoize :size + # Subclasses should return true only if the URL is in its final normalized form. # - # Sources::Site.new("http://img.pixiv.net/img/evazion").normalized_for_artist_finder? + # Sources::Strategies.find("http://img.pixiv.net/img/evazion").normalized_for_artist_finder? # => true - # Sources::Site.new("http://i2.pixiv.net/img18/img/evazion/14901720_m.png").normalized_for_artist_finder? + # Sources::Strategies.find("http://i2.pixiv.net/img18/img/evazion/14901720_m.png").normalized_for_artist_finder? # => false def normalized_for_artist_finder? false @@ -44,32 +111,33 @@ module Sources # Subclasses should return true only if the URL is a valid URL that could # be converted into normalized form. # - # Sources::Site.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054").normalizable_for_artist_finder? + # Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054").normalizable_for_artist_finder? # => true - # Sources::Site.new("http://dic.pixiv.net/a/THUNDERproject").normalizable_for_artist_finder? + # Sources::Strategies.find("http://dic.pixiv.net/a/THUNDERproject").normalizable_for_artist_finder? # => false def normalizable_for_artist_finder? false end - def normalize_for_artist_finder! - url - end - - def site_name - raise NotImplementedError + def normalize_for_artist_finder + profile_url || url end + # A unique identifier for the artist. This is used for artist creation. def unique_id artist_name end def artists - Artist.find_artists(url, referer_url) + Artist.find_artists(profile_url) end - def image_urls - [image_url] + def file_url + image_url + end + + def data + {} end def tags @@ -97,11 +165,6 @@ module Sources translated_tags end - # Should be set to a url for sites that prevent hotlinking, or left nil for sites that don't. - def fake_referer - nil - end - def dtext_artist_commentary_title self.class.to_dtext(artist_commentary_title) end @@ -110,9 +173,40 @@ module Sources self.class.to_dtext(artist_commentary_desc) end + # A strategy may return extra data unrelated to the file + def data + return {} + end + + def to_h + return { + :artist_name => artist_name, + :artists => artists.as_json(include: :sorted_urls), + :profile_url => profile_url, + :image_url => image_url, + :image_urls => image_urls, + :normalized_for_artist_finder_url => normalize_for_artist_finder, + :tags => tags, + :translated_tags => translated_tags, + :unique_id => unique_id, + :artist_commentary => { + :title => artist_commentary_title, + :description => artist_commentary_desc, + :dtext_title => dtext_artist_commentary_title, + :dtext_description => dtext_artist_commentary_desc, + } + } + end + + def to_json + to_h.to_json + end + protected - def agent - raise NotImplementedError + + def http_exists?(url, headers) + res = HTTParty.head(url, Danbooru.config.httparty_options.deep_merge(headers: headers)) + res.success? end # Convert commentary to dtext by stripping html tags. Sites can override diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 1e7766d22..a06ecad8e 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -1,44 +1,127 @@ module Sources module Strategies class DeviantArt < Base - extend Memoist + ATTRIBUTED_ASSET = %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i + ASSET = %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i + PATH_ART = %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/} + RESERVED_SUBDOMAINS = %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} + SUBDOMAIN_ART = %r{\Ahttps?://(.+?)\.deviantart\.com(.*)} + PROFILE = %r{\Ahttps?://www\.deviantart\.com/([^/]+)/?\z} - def self.url_match?(url) - url =~ /^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/ - end - - def self.normalize(url) - if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i - "http://fav.me/d#{$1}" - elsif url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i - "http://fav.me/d#{$1}" - elsif url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/} - url - elsif url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} && url =~ %r{\Ahttps?://(.+?)\.deviantart\.com(.*)} - "http://www.deviantart.com/#{$1}#{$2}" - else - url - end - end - - def referer_url - if @referer_url =~ /deviantart\.com\/art\// && @url =~ /https?:\/\/(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net\// - @referer_url - else - @url - end + def self.match?(*urls) + urls.compact.any? { |x| x.match?(/^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/) } end def site_name "Deviant Art" end - def unique_id - artist_name + def image_urls + # normalize thumbnails + if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)200H/} + match = $1 + return [url.sub(match + "200H/", match)] + end + + if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)PRE/} + match = $1 + return [url.sub(match + "PRE/", match)] + end + + # return direct links + if url =~ ATTRIBUTED_ASSET || url =~ ASSET + return [url] + end + + # work is deleted, use image url as given by user. + if uuid.nil? + return [url] + end + + # work is downloadable + if api_deviation[:is_downloadable] && api_deviation[:download_filesize] != api_deviation.dig(:content, :filesize) + src = api_download[:src] + src.gsub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://") + src.gsub!(/\?.*\z/, "") # strip s3 query params + src.gsub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work + + return [src] + end + + # work isn't downloadable, or download size is same as regular size. + if api_deviation.present? + return [api_deviation.dig(:content, :src)] + end + + raise "Couldn't find image url" end - def get - # no-op + def page_url + [url, referer_url].each do |x| + if x =~ ATTRIBUTED_ASSET + return "http://fav.me/d#{$1}" + end + + if x =~ ASSET + return "http://fav.me/d#{$1}" + end + + if x =~ PATH_ART + return x + end + + if x !~ RESERVED_SUBDOMAINS && x =~ SUBDOMAIN_ART + return "http://www.deviantart.com/#{$1}#{$2}" + end + end + + return super + end + + def profile_url + if url =~ PROFILE + return url + end + + if artist_name.blank? + return nil + end + + return "https://www.deviantart.com/#{artist_name}" + end + + def artist_name + api_metadata.dig(:author, :username).try(&:downcase) + end + + def artist_commentary_title + api_metadata[:title] + end + + def artist_commentary_desc + api_metadata[:description] + end + + def normalized_for_artist_finder? + url =~ PROFILE + end + + def normalizable_for_artist_finder? + url =~ PATH_ART || url =~ SUBDOMAIN_ART + end + + def normalize_for_artist_finder + profile_url + end + + def tags + if api_metadata.blank? + return [] + end + + api_metadata[:tags].map do |tag| + [tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"] + end end def dtext_artist_commentary_desc @@ -71,75 +154,24 @@ module Sources end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "") end - def artist_name - api_metadata.dig(:author, :username).try(&:downcase) - end - - def profile_url - return "" if artist_name.blank? - "https://www.deviantart.com/#{artist_name}" - end - - def image_url - # work is deleted, use image url as given by user. - if uuid.nil? - url - # work is downloadable - elsif api_deviation[:is_downloadable] && api_deviation[:download_filesize] != api_deviation.dig(:content, :filesize) - src = api_download[:src] - src.gsub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://") - src.gsub!(/\?.*\z/, "") # strip s3 query params - src.gsub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work - - src - # work isn't downloadable, or download size is same as regular size. - elsif api_deviation.present? - api_deviation.dig(:content, :src) - else - raise "couldn't find image url" - end - end - - def tags - return [] if api_metadata.blank? - - api_metadata[:tags].map do |tag| - [tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"] - end - end - - def artist_commentary_title - api_metadata[:title] - end - - def artist_commentary_desc - api_metadata[:description] - end - - def normalizable_for_artist_finder? - url !~ %r!^https?://www.deviantart.com/! - end - - def normalized_for_artist_finder? - url =~ %r!^https?://www.deviantart.com/! - end - - def normalize_for_artist_finder! - profile_url - end - - protected - - def normalized_url - @normalized_url ||= self.class.normalize(url) - end + public def page - options = Danbooru.config.httparty_options.deep_merge(format: :plain, headers: { "Accept-Encoding" => "gzip" }) - resp = HTTParty.get(normalized_url, **options) - body = Zlib.gunzip(resp.body) + options = Danbooru.config.httparty_options.deep_merge( + format: :plain, + headers: { "Accept-Encoding" => "gzip" } + ) + resp = HTTParty.get(page_url, **options) + + if resp.success? + body = Zlib.gunzip(resp.body) + else + raise HTTParty::ResponseError.new(resp) + end + Nokogiri::HTML(body) end + memoize :page # Scrape UUID from # For private works the UUID will be nil. @@ -151,29 +183,39 @@ module Sources uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1] uuid end + memoize :uuid def api_client - api_client = DeviantArtApiClient.new(Danbooru.config.deviantart_client_id, Danbooru.config.deviantart_client_secret, Danbooru.config.httparty_options) - api_client.access_token = Cache.get("da-access-token", 55.minutes) { api_client.access_token.to_hash } + api_client = DeviantArtApiClient.new( + Danbooru.config.deviantart_client_id, + Danbooru.config.deviantart_client_secret, + Danbooru.config.httparty_options + ) + api_client.access_token = Cache.get("da-access-token", 55.minutes) do + api_client.access_token.to_hash + end api_client end + memoize :api_client def api_deviation return {} if uuid.nil? api_client.deviation(uuid) end + memoize :api_deviation def api_metadata return {} if uuid.nil? api_client.metadata(uuid)[:metadata].first end + memoize :api_metadata def api_download return {} if uuid.nil? api_client.download(uuid) end + memoize :api_download - memoize :page, :uuid, :api_client, :api_deviation, :api_metadata, :api_download end end end diff --git a/app/logical/sources/strategies/moebooru.rb b/app/logical/sources/strategies/moebooru.rb new file mode 100644 index 000000000..630199a3a --- /dev/null +++ b/app/logical/sources/strategies/moebooru.rb @@ -0,0 +1,35 @@ +module Sources + module Strategies + class Moebooru < Base + DOMAINS = /(?:[^.]+\.)?yande\.re|konachan\.com/ + + def self.match?(*urls) + urls.compact.any? { |x| x.match?(DOMAINS) } + end + + def site_name + URI.parse(url).host + end + + def image_url + if url =~ %r{\A(https?://(?:#{DOMAINS}))/jpeg/([a-f0-9]+(?:/.*)?)\.jpg\Z} + return $1 + "/image/" + $2 + ".png" + end + + return url + end + + def page_url + return url + end + + def profile_url + return url + end + + def artist_name + return "" + end + end + end +end diff --git a/app/logical/sources/strategies/nico_seiga.rb b/app/logical/sources/strategies/nico_seiga.rb index 3b18306b3..13f957cb0 100644 --- a/app/logical/sources/strategies/nico_seiga.rb +++ b/app/logical/sources/strategies/nico_seiga.rb @@ -1,182 +1,188 @@ module Sources module Strategies class NicoSeiga < Base - extend Memoist - - def self.url_match?(url) - url =~ /^https?:\/\/(?:\w+\.)?nico(?:seiga|video)\.jp/ - end + URL = %r!\Ahttps?://(?:\w+\.)?nico(?:seiga|video)\.jp! + DIRECT = %r!\Ahttps?://lohas\.nicoseiga\.jp/priv/[0-9a-f]+! + PAGE = %r!\Ahttps?://seiga\.nicovideo\.jp/seiga/im(\d+)!i + PROFILE = %r!\Ahttps?://seiga\.nicovideo\.jp/user/illust/(\d+)!i - def referer_url - if @referer_url =~ /seiga\.nicovideo\.jp\/seiga\/im\d+/ && @url =~ /http:\/\/lohas\.nicoseiga\.jp\/(?:priv|o)\// - @referer_url - else - @url - end + def self.match?(*urls) + urls.compact.any? { |x| x.match?(URL) } end def site_name "Nico Seiga" end - def unique_id - profile_url =~ /\/illust\/(\d+)/ - "nicoseiga" + $1 - end - - def get - page = load_page - - @artist_name, @profile_url = get_profile_from_api - @image_url = get_image_url_from_page(page) - @artist_commentary_title, @artist_commentary_desc = get_artist_commentary_from_api - - # Log out before getting the tags. - # The reason for this is that if you're logged in and viewing a non-adult-rated work, the tags will be added with javascript after the page has loaded meaning we can't extract them easily. - # This does not apply if you're logged out (or if you're viewing an adult-rated work). - agent.cookie_jar.clear! - agent.get(normalized_url) do |page| - @tags = get_tags_from_page(page) - end - end - - def normalized_for_artist_finder? - url =~ %r!https?://seiga\.nicovideo\.jp/user/illust/\d+/!i - end - - def normalizable_for_artist_finder? - url =~ %r!https?://seiga\.nicovideo\.jp/seiga/im\d+!i - end - - def normalize_for_artist_finder! - page = load_page - @illust_id = get_illust_id_from_url - @artist_name, @profile_url = get_profile_from_api - @profile_url + "/" - end - - protected - - def api_client - NicoSeigaApiClient.new(get_illust_id_from_url) - end - - def get_illust_id_from_url - if normalized_url =~ %r!http://seiga.nicovideo.jp/seiga/im(\d+)! - $1.to_i - else - nil - end - end - - def load_page - page = agent.get(normalized_url) - - if page.search("a#link_btn_login").any? - # Session cache is invalid, clear it and log in normally. - Cache.delete("nico-seiga-session") - @agent = nil - page = agent.get(normalized_url) + def image_urls + if url =~ DIRECT + return [url] end - page - end - - def get_profile_from_api - return [api_client.moniker, "http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"] - end - - def get_image_url_from_page(page) link = page.search("a#illust_link") if link.any? image_url = "http://seiga.nicovideo.jp" + link[0]["href"] page = agent.get(image_url) # need to follow this redirect while logged in or it won't work + if page.is_a?(Mechanize::Image) - return page.uri.to_s + return [page.uri.to_s] end + images = page.search("div.illust_view_big").select {|x| x["data-src"] =~ /\/priv\//} + if images.any? - image_url = "http://lohas.nicoseiga.jp" + images[0]["data-src"] - end - else - image_url = nil - end - - return image_url - end - - def get_tags_from_page(page) - links = page.search("a.tag") - - links.map do |node| - [node.text, "http://seiga.nicovideo.jp" + node.attr("href")] - end - end - - def get_artist_commentary_from_api - [api_client.title, api_client.desc] - end - - def normalized_url - @normalized_url ||= begin - if url =~ %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)! - "http://seiga.nicovideo.jp/seiga/im#{$1}" - elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)\?e=\d+&h=[a-f0-9]+}i - "http://seiga.nicovideo.jp/seiga/im#{$1}" - elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/[a-f0-9]+/\d+/(\d+)}i - "http://seiga.nicovideo.jp/seiga/im#{$1}" - elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)}i - "http://seiga.nicovideo.jp/seiga/im#{$1}" - elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp//?thumb/(\d+)i?}i - "http://seiga.nicovideo.jp/seiga/im#{$1}" - elsif url =~ %r{/seiga/im\d+} - url - else - nil + return ["http://lohas.nicoseiga.jp" + images[0]["data-src"]] end end + + raise "image url not found for (#{url}, #{referer_url})" end + def page_url + [url, referer_url].each do |x| + if x =~ %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)! + return "http://seiga.nicovideo.jp/seiga/im#{$1}" + end + + if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)\?e=\d+&h=[a-f0-9]+}i + return "http://seiga.nicovideo.jp/seiga/im#{$1}" + end + + if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/[a-f0-9]+/\d+/(\d+)}i + return "http://seiga.nicovideo.jp/seiga/im#{$1}" + end + + if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)}i + return "http://seiga.nicovideo.jp/seiga/im#{$1}" + end + + if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp//?thumb/(\d+)i?}i + return "http://seiga.nicovideo.jp/seiga/im#{$1}" + end + + if x =~ %r{/seiga/im\d+} + return x + end + end + + return super + end + + def profile_url + if url =~ PROFILE + return url + end + + "http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}" + end + + def artist_name + api_client.moniker + end + + def artist_commentary_title + api_client.title + end + + def artist_commentary_desc + api_client.desc + end + + def headers + super.merge( + "Referer" => "https://seiga.nicovideo.jp" + ) + end + + def normalized_for_artist_finder? + url =~ PROFILE + end + + def normalizable_for_artist_finder? + url =~ PAGE || url =~ PROFILE + end + + def normalize_for_artist_finder + "#{profile_url}/" + end + + def unique_id + "nicoseiga#{api_client.user_id}" + end + + def tags + string = page.at("meta[name=keywords]").try(:[], "content") || "" + string.split(/,/).map do |name| + [name, "https://seiga.nicovideo.jp/tag/#{CGI.escape(name)}"] + end + end + memoize :tags + + public + + def api_client + NicoSeigaApiClient.new(illust_id) + end + memoize :api_client + + def illust_id + if page_url =~ PAGE + return $1.to_i + end + + return nil + end + + def page + doc = agent.get(page_url) + + if doc.search("a#link_btn_login").any? + # Session cache is invalid, clear it and log in normally. + Cache.delete("nico-seiga-session") + doc = agent.get(page_url) + end + + doc + end + memoize :page + def agent - @agent ||= begin - mech = Mechanize.new - mech.redirect_ok = false - mech.keep_alive = false + mech = Mechanize.new + mech.redirect_ok = false + mech.keep_alive = false - session = Cache.get("nico-seiga-session") - if session - cookie = Mechanize::Cookie.new("user_session", session) - cookie.domain = ".nicovideo.jp" - cookie.path = "/" - mech.cookie_jar.add(cookie) - else - mech.get("https://account.nicovideo.jp/login") do |page| - page.form_with(:id => "login_form") do |form| - form["mail_tel"] = Danbooru.config.nico_seiga_login - form["password"] = Danbooru.config.nico_seiga_password - end.click_button - end - session = mech.cookie_jar.cookies.select{|c| c.name == "user_session"}.first - if session - Cache.put("nico-seiga-session", session.value, 1.month) - else - raise "Session not found" - end - end - - # This cookie needs to be set to allow viewing of adult works - cookie = Mechanize::Cookie.new("skip_fetish_warning", "1") - cookie.domain = "seiga.nicovideo.jp" + session = Cache.get("nico-seiga-session") + if session + cookie = Mechanize::Cookie.new("user_session", session) + cookie.domain = ".nicovideo.jp" cookie.path = "/" mech.cookie_jar.add(cookie) - - mech.redirect_ok = true - mech + else + mech.get("https://account.nicovideo.jp/login") do |page| + page.form_with(:id => "login_form") do |form| + form["mail_tel"] = Danbooru.config.nico_seiga_login + form["password"] = Danbooru.config.nico_seiga_password + end.click_button + end + session = mech.cookie_jar.cookies.select{|c| c.name == "user_session"}.first + if session + Cache.put("nico-seiga-session", session.value, 1.month) + else + raise "Session not found" + end end - end - memoize :api_client + # This cookie needs to be set to allow viewing of adult works + cookie = Mechanize::Cookie.new("skip_fetish_warning", "1") + cookie.domain = "seiga.nicovideo.jp" + cookie.path = "/" + mech.cookie_jar.add(cookie) + + mech.redirect_ok = true + mech + end + memoize :agent end end end diff --git a/app/logical/sources/strategies/nijie.rb b/app/logical/sources/strategies/nijie.rb index 53e32d21e..81879d3bf 100644 --- a/app/logical/sources/strategies/nijie.rb +++ b/app/logical/sources/strategies/nijie.rb @@ -1,155 +1,158 @@ module Sources module Strategies class Nijie < Base - attr_reader :image_urls + PICTURE = %r{pic\d+\.nijie.info/nijie_picture/} + PAGE = %r{\Ahttps?://nijie\.info/view\.php.+id=\d+} + DIFF = %r!\Ahttps?://pic\d+\.nijie\.info/__rs_l120x120/nijie_picture/diff/main/[0-9_]+\.\w+\z!i - def self.url_match?(url) - url =~ /^https?:\/\/(?:.+?\.)?nijie\.info/ - end - - def initialize(url, referer_url=nil) - super(normalize_url(url), normalize_url(referer_url)) - end - - def referer_url - if @referer_url =~ /nijie\.info\/view\.php.+id=\d+/ && @url =~ /pic\d+\.nijie.info\/nijie_picture\// - @referer_url - else - @url - end + def self.match?(*urls) + urls.compact.any? { |x| x.match?(/^https?:\/\/(?:.+?\.)?nijie\.info/) } end def site_name "Nijie" end + def image_urls + if url =~ PICTURE + return [url] + end + + # http://pic03.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png + # => http://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png + if url =~ DIFF + return [normalize_thumbnails(url)] + end + + page.search("div#gallery a > img").map do |img| + # //pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png + # => https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png + normalize_thumbnails("https:" + img.attr("src")) + end.uniq + end + + def page_url + [url, referer_url].each do |x| + if x =~ PAGE + return x + end + + if x =~ %r!https?://nijie\.info/view_popup\.php.+id=(\d+)! + return "https://nijie.info/view.php?id=#{$1}" + end + end + + return super + end + + def profile_url + links = page.search("a.name") + + if links.any? + return "https://nijie.info/" + links[0]["href"] + end + + return nil + end + + def artist_name + links = page.search("a.name") + + if links.any? + return links[0].text + end + + return nil + end + + def artist_commentary_title + page.search("h2.illust_title").text + end + + def artist_commentary_desc + page.search('meta[property="og:description"]').attr("content").value + end + + def tags + links = page.search("div#view-tag a").find_all do |node| + node["href"] =~ /search\.php/ + end + + if links.any? + return links.map do |node| + [node.inner_text, "https://nijie.info" + node.attr("href")] + end + end + + return [] + end + def unique_id profile_url =~ /nijie\.info\/members.php\?id=(\d+)/ "nijie" + $1.to_s end - def image_url - image_urls.first - end - - def get - page = agent.get(referer_url) - - if page.search("div#header-login-container").any? - # Session cache is invalid, clear it and log in normally. - Cache.delete("nijie-session") - @agent = nil - page = agent.get(referer_url) - end - - @artist_name, @profile_url = get_profile_from_page(page) - @image_urls = get_image_urls_from_page(page) - @tags = get_tags_from_page(page) - @artist_commentary_title, @artist_commentary_desc = get_commentary_from_page(page) - end - - protected + public def self.to_dtext(text) text = text.gsub(/\r\n|\r/, "
") DText.from_html(text).strip end - def get_commentary_from_page(page) - title = page.search("h2.illust_title").text - desc = page.search('meta[property="og:description"]').attr("content").value - - [title, desc] + def normalize_thumbnails(x) + x.gsub(%r!__rs_l120x120/!i, "") end - def get_profile_from_page(page) - links = page.search("a.name") + def page + doc = agent.get(page_url) - if links.any? - profile_url = "http://nijie.info/" + links[0]["href"] - artist_name = links[0].text - else - profile_url = nil - artist_name = nil + if doc.search("div#header-login-container").any? + # Session cache is invalid, clear it and log in normally. + Cache.delete("nijie-session") + doc = agent.get(page_url) end - return [artist_name, profile_url].compact - end - - def get_image_urls_from_page(page) - page.search("div#gallery a > img").map do |img| - # //pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png - # => https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png - url = "https:" + img.attr("src") - normalize_image_url(url) - end - end - - def get_tags_from_page(page) - # puts page.root.to_xhtml - - links = page.search("div#view-tag a").find_all do |node| - node["href"] =~ /search\.php/ - end - - if links.any? - links.map do |node| - [node.inner_text, "http://nijie.info" + node.attr("href")] - end - else - [] - end - end - - def normalize_url(url) - if url =~ %r!https?://nijie\.info/view_popup\.php.+id=(\d+)! - return "http://nijie.info/view.php?id=#{$1}" - else - return url - end - end - - def normalize_image_url(image_url) - # http://pic03.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png - # => http://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png - if image_url =~ %r!\Ahttps?://pic\d+\.nijie\.info/__rs_l120x120/nijie_picture/diff/main/[0-9_]+\.\w+\z!i - image_url = image_url.gsub(%r!__rs_l120x120/!i, "") - end - - image_url = image_url.gsub(%r!\Ahttp:!i, "https:") - image_url + return doc end + memoize :page def agent - @agent ||= begin - mech = Mechanize.new + mech = Mechanize.new - session = Cache.get("nijie-session") - if session - cookie = Mechanize::Cookie.new("NIJIEIJIEID", session) - cookie.domain = ".nijie.info" - cookie.path = "/" - mech.cookie_jar.add(cookie) - else - mech.get("http://nijie.info/login.php") do |page| - page.form_with(:action => "/login_int.php") do |form| - form['email'] = Danbooru.config.nijie_login - form['password'] = Danbooru.config.nijie_password - end.click_button - end - session = mech.cookie_jar.cookies.select{|c| c.name == "NIJIEIJIEID"}.first - Cache.put("nijie-session", session.value, 1.month) if session - end - - # This cookie needs to be set to allow viewing of adult works while anonymous - cookie = Mechanize::Cookie.new("R18", "1") + session = Cache.get("nijie-session") + if session + cookie = Mechanize::Cookie.new("NIJIEIJIEID", session) cookie.domain = ".nijie.info" cookie.path = "/" mech.cookie_jar.add(cookie) + else + mech.get("https://nijie.info/login.php") do |page| + page.form_with(:action => "/login_int.php") do |form| + form['email'] = Danbooru.config.nijie_login + form['password'] = Danbooru.config.nijie_password + end.click_button + end + session = mech.cookie_jar.cookies.select{|c| c.name == "NIJIEIJIEID"}.first + Cache.put("nijie-session", session.value, 1.day) if session + end - mech + # This cookie needs to be set to allow viewing of adult works while anonymous + cookie = Mechanize::Cookie.new("R18", "1") + cookie.domain = ".nijie.info" + cookie.path = "/" + mech.cookie_jar.add(cookie) + + mech + + rescue Mechanize::ResponseCodeError => x + if x.response_code.to_i == 429 + sleep(5) + retry + else + raise end end + memoize :agent end end end diff --git a/app/logical/sources/strategies/null.rb b/app/logical/sources/strategies/null.rb new file mode 100644 index 000000000..9093e01b9 --- /dev/null +++ b/app/logical/sources/strategies/null.rb @@ -0,0 +1,43 @@ +module Sources + module Strategies + class Null < Base + def self.match?(*urls) + true + end + + def image_urls + [url] + end + + def page_url + url + end + + def normalized_for_artist_finder? + true + end + + def normalizable_for_artist_finder? + false + end + + def normalize_for_artist_finder + url + end + + def site_name + URI.parse(url).hostname || "N/A" + rescue + "N/A" + end + + def unique_id + url + end + + def rewrite(url, headers, data) + return [url, headers, data] + end + end + end +end diff --git a/app/logical/sources/strategies/pawoo.rb b/app/logical/sources/strategies/pawoo.rb index bad6d385b..666242ec5 100644 --- a/app/logical/sources/strategies/pawoo.rb +++ b/app/logical/sources/strategies/pawoo.rb @@ -1,62 +1,80 @@ -# html page urls: -# https://pawoo.net/@evazion/19451018 -# https://pawoo.net/web/statuses/19451018 -# -# image urls: -# https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png -# https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png -# https://pawoo.net/media/lU2uV7C1MMQSb1czwvg -# -# artist urls: -# https://pawoo.net/@evazion -# https://pawoo.net/web/accounts/47806 - module Sources::Strategies class Pawoo < Base - attr_reader :image_urls + IMAGE = %r!\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)! - def self.url_match?(url) - PawooApiClient::Status.is_match?(url) || PawooApiClient::Account.is_match?(url) - end - - def referer_url - normalized_url + def self.match?(*urls) + urls.compact.any? do |x| + x =~ IMAGE || PawooApiClient::Status.is_match?(x) || PawooApiClient::Account.is_match?(x) + end end def site_name "Pawoo" end - def api_response - @response ||= PawooApiClient.new.get(normalized_url) + def image_url + image_urls.first end - def get - response = api_response - @artist_name = response.account_name - @profile_url = response.profile_url - @image_url = response.image_urls.first - @image_urls = response.image_urls - @tags = response.tags - @artist_commentary_title = nil - @artist_commentary_desc = response.commentary - end - - def normalized_url - if self.class.url_match?(@url) - @url - elsif self.class.url_match?(@referer_url) - @referer_url + # https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png + # https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png + # https://pawoo.net/media/lU2uV7C1MMQSb1czwvg + def image_urls + if url =~ %r!#{IMAGE}/small/([a-z0-9]+\.\w+)\z!i + return ["https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}"] end + + if url =~ %r!#{IMAGE}/original/([a-z0-9]+\.\w+)\z!i + return [url] + end + + return api_response.image_urls + end + + # https://pawoo.net/@evazion/19451018 + # https://pawoo.net/web/statuses/19451018 + def page_url + [url, referer_url].each do |x| + if PawooApiClient::Status.is_match?(x) + return x + end + end + + return super + end + + # https://pawoo.net/@evazion + # https://pawoo.net/web/accounts/47806 + def profile_url + if url =~ PawooApiClient::PROFILE2 + return "https://pawoo.net/@#{$1}" + end + + api_response.profile_url + end + + def artist_name + api_response.account_name + end + + def artist_commentary_title + nil + end + + def artist_commentary_desc + api_response.commentary + end + + def tags + api_response.tags end def normalizable_for_artist_finder? true end - def normalize_for_artist_finder! - get - @profile_url || @url + def normalize_for_artist_finder + profile_url end def dtext_artist_commentary_desc @@ -68,5 +86,18 @@ module Sources::Strategies end end.strip end + + public + + def api_response + [url, referer_url].each do |x| + if client = PawooApiClient.new.get(x) + return client + end + end + + nil + end + memoize :api_response end end diff --git a/app/logical/sources/strategies/pixiv.rb b/app/logical/sources/strategies/pixiv.rb index 2b982dd7c..e880ecc64 100644 --- a/app/logical/sources/strategies/pixiv.rb +++ b/app/logical/sources/strategies/pixiv.rb @@ -1,122 +1,23 @@ -# encoding: UTF-8 - require 'csv' module Sources module Strategies class Pixiv < Base - attr_reader :zip_url, :ugoira_frame_data, :ugoira_content_type + MONIKER = %r!(?:[a-zA-Z0-9_-]+)! + PROFILE = %r!\Ahttps?://www\.pixiv\.net/member\.php\?id=[0-9]+\z! + EXT = %r!(?:jpg|jpeg|png|gif)!i - MONIKER = '(?:[a-zA-Z0-9_-]+)' - TIMESTAMP = '(?:[0-9]{4}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2})' - EXT = "(?:jpg|jpeg|png|gif)" + WEB = %r!(?:\A(?:https?://)?www\.pixiv\.net)! + I12 = %r!(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)! + IMG = %r!(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)! + PXIMG = %r!(?:\A(?:https?://)?i\.pximg\.net)! + TOUCH = %r!(?:\A(?:https?://)?touch\.pixiv\.net)! + NOVEL_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/novel/show\.php\?id=(\d+))! + FANBOX_IMAGE = %r!(?:\Ahttps?://fanbox\.pixiv\.net/images/post/(\d+))! + FANBOX_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+/post/(\d+))! - WEB = '(?:\A(?:https?://)?www\.pixiv\.net)' - I12 = '(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)' - IMG = '(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)' - PXIMG = '(?:\A(?:https?://)?i\.pximg\.net)' - TOUCH = '(?:\A(?:https?://)?touch\.pixiv\.net)' - - def self.url_match?(url) - url =~ /#{WEB}|#{IMG}|#{I12}|#{TOUCH}|#{PXIMG}/i - end - - def referer_url - if @referer_url =~ /pixiv\.net\/member_illust.+mode=medium/ && @url =~ /#{IMG}|#{I12}/ - @referer_url - else - @url - end - end - - def site_name - "Pixiv" - end - - def unique_id - @pixiv_moniker - end - - def fake_referer - "http://www.pixiv.net" - end - - def normalized_for_artist_finder? - url =~ %r!\Ahttp://www\.pixiv\.net/member\.php\?id=[0-9]+\z/! - end - - def normalizable_for_artist_finder? - has_moniker? || sample_image? || full_image? || work_page? - end - - def normalize_for_artist_finder! - @illust_id = illust_id_from_url! - @metadata = get_metadata_from_papi(@illust_id) - - "http://www.pixiv.net/member.php?id=#{@metadata.user_id}/" - end - - def translate_tag(tag) - normalized_tag = tag.gsub(/\d+users入り\z/i, "") - - translated_tags = super(normalized_tag) - if translated_tags.empty? && normalized_tag.include?("/") - translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) } - end - - translated_tags - end - - def get - return unless illust_id_from_url - @illust_id = illust_id_from_url - @metadata = get_metadata_from_papi(@illust_id) - - page = agent.get(URI.parse(normalized_url)) - - if page.search("body.not-logged-in").any? - # Session cache is invalid, clear it and log in normally. - Cache.delete("pixiv-phpsessid") - @agent = nil - page = agent.get(URI.parse(normalized_url)) - end - - @artist_name = @metadata.name - @profile_url = "http://www.pixiv.net/member.php?id=#{@metadata.user_id}" - @pixiv_moniker = @metadata.moniker - @zip_url, @ugoira_frame_data, @ugoira_content_type = get_zip_url_from_api - @tags = @metadata.tags.map do |tag| - [tag, "https://www.pixiv.net/search.php?s_mode=s_tag_full&#{{word: tag}.to_param}"] - end - @page_count = @metadata.page_count - @artist_commentary_title = @metadata.artist_commentary_title - @artist_commentary_desc = @metadata.artist_commentary_desc - - is_manga = @page_count > 1 - - if !@zip_url - page = manga_page_from_url(@url).to_i - @image_url = image_urls[page] - end - end - - def rewrite_thumbnails(thumbnail_url, is_manga=nil) - thumbnail_url = rewrite_new_medium_images(thumbnail_url) - thumbnail_url = rewrite_medium_ugoiras(thumbnail_url) - thumbnail_url = rewrite_old_small_and_medium_images(thumbnail_url, is_manga) - return thumbnail_url - end - - def agent - @agent ||= PixivWebAgent.build - end - - def file_url - image_url || zip_url - end - - def image_urls - @metadata.pages + def self.match?(*urls) + urls.compact.any? { |x| x.match?(/#{WEB}|#{IMG}|#{I12}|#{TOUCH}|#{PXIMG}|#{FANBOX_IMAGE}/i) } end def self.to_dtext(text) @@ -137,18 +38,147 @@ module Sources DText.from_html(text) end - def illust_id_from_url - if sample_image? || full_image? || work_page? - illust_id_from_url! - else - nil + def site_name + "Pixiv" + end + + def image_urls + image_urls_sub. + map {|x| rewrite_cdn(x)} + rescue PixivApiClient::BadIDError + [url] + end + + def page_url + if novel_id.present? + return "https://www.pixiv.net/novel/show.php?id=#{novel_id}&mode=cover" end - rescue Sources::Error - raise if Rails.env.test? + + if fanbox_id.present? + return "https://www.pixiv.net/fanbox/creator/#{metadata.user_id}/post/#{fanbox_id}" + end + + if illust_id.present? + return "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{illust_id}" + end + + return url + + rescue PixivApiClient::BadIDError + nil + end + + def canonical_url + return image_url + end + + def profile_url + [url, referer_url].each do |x| + if x =~ PROFILE + return x + end + end + + "https://www.pixiv.net/member.php?id=#{metadata.user_id}" + rescue PixivApiClient::BadIDError nil end - def illust_id_from_url! + def artist_name + metadata.name + rescue PixivApiClient::BadIDError + nil + end + + def artist_commentary_title + metadata.artist_commentary_title + rescue PixivApiClient::BadIDError + nil + end + + def artist_commentary_desc + metadata.artist_commentary_desc + rescue PixivApiClient::BadIDError + nil + end + + def headers + if fanbox_id.present? + # need the session to download fanbox images + return { + "Referer" => "https://www.pixiv.net/fanbox", + "Cookie" => HTTP::Cookie.cookie_value(agent.cookies) + } + end + + return { + "Referer" => "https://www.pixiv.net" + } + end + + def normalized_for_artist_finder? + url =~ PROFILE + end + + def normalizable_for_artist_finder? + illust_id.present? || novel_id.present? || fanbox_id.present? + end + + def unique_id + moniker + end + + def tags + metadata.tags.map do |tag| + [tag, "https://www.pixiv.net/search.php?s_mode=s_tag_full&#{{word: tag}.to_param}"] + end + rescue PixivApiClient::BadIDError + [] + end + memoize :tags + + def translate_tag(tag) + normalized_tag = tag.gsub(/\d+users入り\z/i, "") + translated_tags = super(normalized_tag) + + if translated_tags.empty? && normalized_tag.include?("/") + translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) } + end + + translated_tags + end + + public + + def image_urls_sub + # there's too much normalization bullshit we have to deal with + # raw urls, so just fetch the canonical url from the api every + # time. + + if manga_page.present? + return [metadata.pages[manga_page]] + end + + if metadata.pages.is_a?(Hash) + return [ugoira_zip_url] + end + + return metadata.pages + end + + def rewrite_cdn(x) + if x =~ %r{\Ahttps?:\/\/(?:\w+\.)?pixiv\.net\.edgesuite\.net} + return x.sub(".edgesuite.net", "") + end + + return x + end + + # in order to prevent recursive loops, this method should not make any + # api calls and only try to extract the illust_id from the url. therefore, + # even though it makes sense to reference page_url here, it will only look + # at (url, referer_url). + def illust_id # http://img18.pixiv.net/img/evazion/14901720.png # # http://i2.pixiv.net/img18/img/evazion/14901720.png @@ -165,228 +195,166 @@ module Sources # # http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip if url =~ %r!/(\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i - $1 - - # http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054 - # http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054 - # http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054 - # http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1 - elsif url =~ /illust_id=(\d+)/i - $1 - - # http://www.pixiv.net/i/18557054 - elsif url =~ %r!pixiv\.net/i/(\d+)!i - $1 - - else - raise Sources::Error.new("Couldn't get illust ID from URL: #{url}") - end - end - - # http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p1_master1200.jpg - # => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1.png - # - # http://i.pximg.net/img-master/img/2014/05/15/23/53/59/43521009_p1_master1200.jpg - # => http://i.pximg.net/img-original/img/2014/05/15/23/53/59/43521009_p1.jpg - def rewrite_new_medium_images(thumbnail_url) - if thumbnail_url =~ %r!/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_p\d+_\w+\.jpg!i || - thumbnail_url =~ %r!/img-master/img/#{TIMESTAMP}/\d+_p\d+_\w+\.jpg!i - page = manga_page_from_url(@url).to_i - thumbnail_url = @metadata.pages[page] + return $1 end - thumbnail_url - end - - # http://i3.pixiv.net/img-zip-ugoira/img/2014/12/03/04/58/24/47378698_ugoira600x600.zip - # => http://i3.pixiv.net/img-zip-ugoira/img/2014/12/03/04/58/24/47378698_ugoira1920x1080.zip - def rewrite_medium_ugoiras(thumbnail_url) - if thumbnail_url =~ %r!/img-zip-ugoira/img/.*/\d+_ugoira600x600.zip!i - thumbnail_url = thumbnail_url.sub("_ugoira600x600.zip", "_ugoira1920x1080.zip") - end - - thumbnail_url - end - - # If the thumbnail is for a manga gallery, it needs to be rewritten like this: - # - # http://i2.pixiv.net/img18/img/evazion/14901720_m.png - # => http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png - # - # Otherwise, it needs to be rewritten like this: - # - # http://i2.pixiv.net/img18/img/evazion/14901720_m.png - # => http://i2.pixiv.net/img18/img/evazion/14901720.png - # - def rewrite_old_small_and_medium_images(thumbnail_url, is_manga) - if thumbnail_url =~ %r!/img/#{MONIKER}/\d+_[ms]\.#{EXT}!i - if is_manga.nil? - page_count = @metadata.page_count - is_manga = page_count > 1 + [url, referer_url].each do |x| + # http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054 + # http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054 + # http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054 + # http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1 + if x =~ /illust_id=(\d+)/i + return $1 end - if is_manga - page = manga_page_from_url(@url) - return thumbnail_url.sub(/_[ms]\./, "_big_p#{page}.") - else - return thumbnail_url.sub(/_[ms]\./, ".") + # http://www.pixiv.net/i/18557054 + if x =~ %r!pixiv\.net/i/(\d+)!i + return $1 end end - return thumbnail_url + raise Sources::Error.new("Couldn't get illust ID from URL (#{url}, #{referer_url})") + end + memoize :illust_id + + def novel_id + [url, referer_url].each do |x| + if x =~ NOVEL_PAGE + return $1 + end + end + + return nil + end + memoize :novel_id + + def fanbox_id + [url, referer_url].each do |x| + if x =~ FANBOX_PAGE + return $1 + end + + if x =~ FANBOX_IMAGE + return $1 + end + end + + return nil + end + memoize :fanbox_id + + def agent + PixivWebAgent.build + end + memoize :agent + + def page + agent.get(URI.parse(page_url)) + + if page.search("body.not-logged-in").any? + # Session cache is invalid, clear it and log in normally. + Cache.delete("pixiv-phpsessid") + @agent = nil + page = agent.get(URI.parse(page_url)) + end + + page + end + memoize :page + + def metadata + if novel_id.present? + return PixivApiClient.new.novel(novel_id) + end + + if fanbox_id.present? + return PixivApiClient.new.fanbox(fanbox_id) + end + + return PixivApiClient.new.work(illust_id) + end + memoize :metadata + + def moniker + # we can sometimes get the moniker from the url + if url =~ %r!#{IMG}/img/(#{MONIKER})!i + return $1 + end + + if url =~ %r!#{I12}/img[0-9]+/img/(#{MONIKER})!i + return $1 + end + + if url =~ %r!#{WEB}/stacc/(#{MONIKER})/?$!i + return $1 + end + + return metadata.moniker + end + memoize :moniker + + def page_count + metadata.page_count end - def manga_page_from_url(url) + def data + return { + ugoira_frame_data: ugoira_frame_data + } + end + + def ugoira_zip_url + if metadata.pages.is_a?(Hash) && metadata.pages["ugoira600x600"] + return metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip") + end + end + memoize :ugoira_zip_url + + def ugoira_frame_data + return metadata.json.dig("metadata", "frames") + end + memoize :ugoira_frame_data + + def ugoira_content_type + case metadata.json["image_urls"].to_s + when /\.jpg/ + return "image/jpeg" + + when /\.png/ + return "image/png" + + when /\.gif/ + return "image/gif" + end + + raise Sources::Error.new("content type not found for (#{url}, #{referer_url})") + end + memoize :ugoira_content_type + + def is_manga? + page_count > 1 + end + + # Returns the current page number of the manga. This will not + # make any api calls and only looks at (url, referer_url). + def manga_page # http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg # http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg # http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg if url =~ %r!/\d+_p(\d+)(?:_\w+)?\.#{EXT}!i - $1 + return $1.to_i + end # http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=46170939&page=0 - elsif url =~ /page=(\d+)/i - $1 - - else - 0 - end - end - - def get_moniker_from_url - case url - when %r!#{IMG}/img/(#{MONIKER})!i - $1 - when %r!#{I12}/img[0-9]+/img/(#{MONIKER})!i - $1 - when %r!#{WEB}/stacc/(#{MONIKER})/?$!i - $1 - else - false - end - end - - def has_moniker? - get_moniker_from_url != false - end - - def get_image_url_from_page(page, is_manga) - if is_manga - elements = page.search("div.works_display a img").find_all do |node| - node["src"] !~ /source\.pixiv\.net/ + [url, referer_url].each do |x| + if x =~ /page=(\d+)/i + return $1.to_i end - else - elements = page.search("div.works_display div img.big") - elements = page.search("div.works_display div img") if elements.empty? end - if elements.any? - element = elements.first - thumbnail_url = element.attr("src") || element.attr("data-src") - return rewrite_thumbnails(thumbnail_url, is_manga) - end - - if page.body =~ /"original":"(https:.+?)"/ - return $1.gsub(/\\\//, '/') - end - end - - def get_zip_url_from_api - if @metadata.pages.is_a?(Hash) && @metadata.pages["ugoira600x600"] - zip_url = @metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip") - frame_data = @metadata.json["metadata"]["frames"] - content_type = nil - - case @metadata.json["image_urls"].to_s - when /\.jpg/ - content_type = "image/jpeg" - - when /\.png/ - content_type = "image/png" - - when /\.gif/ - content_type = "image/gif" - end - - return [zip_url, frame_data, content_type] - end - end - - def get_zip_url_from_page(page) - scripts = page.search("body script").find_all do |node| - node.text =~ /_ugoira600x600\.zip/ - end - - if scripts.any? - javascript = scripts.first.text - - json = javascript.match(/;pixiv\.context\.ugokuIllustData\s+=\s+(\{.+?\});(?:$|pixiv\.context)/)[1] - data = JSON.parse(json) - zip_url = data["src"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip") - frame_data = data["frames"] - content_type = data["mime_type"] - - return [zip_url, frame_data, content_type] - end - end - - def normalized_url - "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{@illust_id}" - end - - def get_metadata_from_papi(illust_id) - @metadata ||= PixivApiClient.new.works(illust_id) - end - - def work_page? - return true if url =~ %r!(?:#{WEB}|#{TOUCH})/member_illust\.php! && url =~ %r!mode=(?:medium|big|manga|manga_big)! && url =~ %r!illust_id=\d+! - return true if url =~ %r!(?:#{WEB}|#{TOUCH})/i/\d+$!i - return false - end - - def full_image? - # http://img18.pixiv.net/img/evazion/14901720.png?1234 - return true if url =~ %r!#{IMG}/img/#{MONIKER}/\d+(?:_big_p\d+)?\.#{EXT}!i - - # http://i2.pixiv.net/img18/img/evazion/14901720.png - # http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png - return true if url =~ %r!#{I12}/img\d+/img/#{MONIKER}/\d+(?:_big_p\d+)?\.#{EXT}!i - - # http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p0.png - return true if url =~ %r!#{I12}/img-original/img/#{TIMESTAMP}/\d+_p\d+\.#{EXT}$!i - - # http://i.pximg.net/img-original/img/2017/03/22/17/40/51/62041488_p0.jpg - return true if url =~ %r!#{PXIMG}/img-original/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i - - # http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip - return true if url =~ %r!(#{I12}|#{PXIMG})/img-zip-ugoira/img/#{TIMESTAMP}/\d+_ugoira\d+x\d+\.zip$!i - - return false - end - - def sample_image? - # http://img18.pixiv.net/img/evazion/14901720_m.png - return true if url =~ %r!#{IMG}/img/#{MONIKER}/\d+_(?:[sm]|p\d+)\.#{EXT}!i - - # http://i2.pixiv.net/img18/img/evazion/14901720_m.png - # http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png - return true if url =~ %r!#{I12}/img\d+/img/#{MONIKER}/\d+_(?:[sm]|p\d+)\.#{EXT}!i - - # http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p0_master1200.jpg - # http://i2.pixiv.net/c/64x64/img-master/img/2014/10/09/12/59/50/46441917_square1200.jpg - return true if url =~ %r!#{I12}/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}$!i - - # http://i.pximg.net/img-master/img/2014/05/15/23/53/59/43521009_p1_master1200.jpg - return true if url =~ %r!#{PXIMG}/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i - - # http://i.pximg.net/c/600x600/img-master/img/2017/03/22/17/40/51/62041488_p0_master1200.jpg - return true if url =~ %r!#{PXIMG}/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i - - # http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_s.png - # http://i2.pixiv.net/img-inf/img/2010/11/30/08/54/06/14901765_64x64.jpg - return true if url =~ %r!#{I12}/img-inf/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i - - return false + return nil end + memoize :manga_page end end end diff --git a/app/logical/sources/strategies/tumblr.rb b/app/logical/sources/strategies/tumblr.rb index d8a769567..c1f48b51e 100644 --- a/app/logical/sources/strategies/tumblr.rb +++ b/app/logical/sources/strategies/tumblr.rb @@ -1,28 +1,52 @@ module Sources::Strategies class Tumblr < Base - extend Memoist + DOMAIN = %r{(data|(\d+\.)?media)\.tumblr\.com} + MD5 = %r{(?[0-9a-f]{32})}i + FILENAME = %r{(?(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i + SIZES = %r{(?:250|400|500|500h|540|1280|raw)}i + EXT = %r{(?\w+)} + IMAGE = %r!\Ahttps?://#{DOMAIN}/(?#{MD5}/)?#{FILENAME}_#{SIZES}\.#{EXT}\z!i + POST = %r!\Ahttps?://(?[^.]+)\.tumblr\.com/(?:post|image)/(?\d+)!i - def self.url_match?(url) - blog_name, post_id = parse_info_from_url(url) - blog_name.present? && post_id.present? + def self.match?(*urls) + urls.compact.any? do |url| + blog_name, post_id = parse_info_from_url(url) + url =~ IMAGE || blog_name.present? && post_id.present? + end end - def referer_url - blog_name, post_id = self.class.parse_info_from_url(normalized_url) - "https://#{blog_name}.tumblr.com/post/#{post_id}" - end - - def tags - post[:tags].map do |tag| - # normalize tags: space, underscore, and hyphen are equivalent in tumblr tags. - [tag.tr(" _-", "_"), "https://tumblr.com/tagged/#{CGI::escape(tag.tr(" _-", "-"))}"] - end.uniq + def self.parse_info_from_url(url) + if url =~ POST + [$~[:blog_name], $~[:post_id]] + else + [] + end end def site_name "Tumblr" end + def image_urls + image_urls_sub + .uniq + .map {|x| normalize_cdn(x)} + .map {|x| find_largest(x)} + .compact + .uniq + end + + def page_url + [url, referer_url].each do |x| + if x =~ POST + blog_name, post_id = self.class.parse_info_from_url(x) + return "https://#{blog_name}.tumblr.com/post/#{post_id}" + end + end + + return super + end + def profile_url "https://#{artist_name}.tumblr.com/" end @@ -35,8 +59,10 @@ module Sources::Strategies case post[:type] when "text", "link" post[:title] + when "answer" "#{post[:asking_name]} asked: #{post[:question]}" + else nil end @@ -46,94 +72,133 @@ module Sources::Strategies case post[:type] when "text" post[:body] + when "link" post[:description] + when "photo", "video" post[:caption] + when "answer" post[:answer] + else nil end end + def tags + post[:tags].map do |tag| + # normalize tags: space, underscore, and hyphen are equivalent in tumblr tags. + etag = tag.gsub(/[ _-]/, "_") + [etag, "https://tumblr.com/tagged/#{CGI.escape(etag)}"] + end.uniq + end + memoize :tags + def dtext_artist_commentary_desc DText.from_html(artist_commentary_desc).strip end - def image_url - image_urls.first - end + public - def image_urls - urls = case post[:type] - when "photo" - post[:photos].map do |photo| - self.class.normalize_image_url(photo[:original_size][:url]) - end - when "video" - [post[:video_url]] - else - [] + def image_urls_sub + list = [] + + if url =~ IMAGE + list << url end - urls += self.class.parse_inline_images(artist_commentary_desc) - urls - end + if page_url !~ POST + return list + end - def get - end - - module HelperMethods - extend ActiveSupport::Concern - - module ClassMethods - def parse_info_from_url(url) - url =~ %r!\Ahttps?://(?[^.]+)\.tumblr\.com/(?:post|image)/(?\d+)!i - [$1, $2] - end - - def parse_inline_images(text) - html = Nokogiri::HTML.fragment(text) - image_urls = html.css("img").map { |node| node["src"] } - image_urls = image_urls.map(&method(:normalize_image_url)) - image_urls - end - - def normalize_image_url(url) - url, _, _ = Downloads::RewriteStrategies::Tumblr.new.rewrite(url, {}) - url + if post[:type] == "photo" + list += post[:photos].map do |photo| + photo[:original_size][:url] end end - def normalized_url - if self.class.url_match?(@referer_url) - @referer_url - elsif self.class.url_match?(@url) - @url + if post[:type] == "video" + list << post[:video_url] + end + + if inline_images.any? + list += inline_images.to_a + end + + if list.any? + return list + end + + raise "image url not found for (#{url}, #{referer_url})" + end + + # Normalize cdn subdomains. + # + # https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png + # => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png + def normalize_cdn(x) + # does this work? + x.sub(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/media\.tumblr\.com!i, "http://media.tumblr.com") + end + + # Look for the biggest available version on media.tumblr.com. A bigger + # version may or may not exist. + # + # https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg + # => https://media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg + # + # https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif + # => https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif + # + # https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png + # => https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png + # + # http://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg + # => https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg + # + # http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg + # => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg + def find_largest(x) + if x =~ IMAGE + sizes = [1280, 640, 540, "500h", 500, 400, 250] + candidates = sizes.map do |size| + "https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}" + end + + return candidates.find do |candidate| + http_exists?(candidate, headers) end end + + return x end - module ApiMethods - def client - raise NotImplementedError.new("Tumblr support is not available (API key not configured).") if Danbooru.config.tumblr_consumer_key.nil? - ::TumblrApiClient.new(Danbooru.config.tumblr_consumer_key) - end - - def api_response - blog_name, post_id = self.class.parse_info_from_url(normalized_url) - client.posts(blog_name, post_id) - end - - def post - api_response[:posts].first - end + def inline_images + html = Nokogiri::HTML.fragment(artist_commentary_desc) + html.css("img").map { |node| node["src"] } end + memoize :inline_images - include ApiMethods - include HelperMethods + def client + raise NotImplementedError.new("Tumblr support is not available (API key not configured).") if Danbooru.config.tumblr_consumer_key.nil? - memoize :client, :api_response + TumblrApiClient.new(Danbooru.config.tumblr_consumer_key) + end + memoize :client + + def api_response + blog_name, post_id = self.class.parse_info_from_url(page_url) + + raise "Page url not found for (#{url}, #{referer_url})" if blog_name.nil? + + client.posts(blog_name, post_id) + end + memoize :api_response + + def post + api_response[:posts].first + end end end diff --git a/app/logical/sources/strategies/twitter.rb b/app/logical/sources/strategies/twitter.rb index 3758e28c1..1e66f70d4 100644 --- a/app/logical/sources/strategies/twitter.rb +++ b/app/logical/sources/strategies/twitter.rb @@ -1,52 +1,94 @@ module Sources::Strategies class Twitter < Base - attr_reader :image_urls + PAGE = %r!\Ahttps?://(?:mobile\.)?twitter\.com!i + ASSET = %r!\A(https?://(?:video|pbs)\.twimg\.com/media/)}!i - def self.url_match?(url) - self.status_id_from_url(url).present? + def self.match?(*urls) + urls.compact.any? { |x| x =~ PAGE || x =~ ASSET} end - def referer_url - normalized_url - end + # https://twitter.com/i/web/status/943446161586733056 + # https://twitter.com/motty08111213/status/943446161586733056 + def self.status_id_from_url(url) + if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(?:i/web|\w+)/status/(\d+)}i + return $1 + end - def normalized_url - "https://twitter.com/#{artist_name}/status/#{status_id}" - end - - def artist_name - api_response.attrs[:user][:screen_name] + return nil end def site_name "Twitter" end - def api_response - @api_response ||= TwitterService.new.client.status(status_id, tweet_mode: "extended") - end - - def get - attrs = api_response.attrs - @profile_url = "https://twitter.com/" + attrs[:user][:screen_name] - @image_urls = TwitterService.new.image_urls(api_response) - @image_url = @image_urls.first - @artist_commentary_title = "" - @artist_commentary_desc = attrs[:full_text] - @tags = attrs[:entities][:hashtags].map do |text:, indices:| - [text, "https://twitter.com/hashtag/#{text}"] + def image_urls + if url =~ /(#{ASSET}[^:]+)/ + return [$1 + ":orig" ] end - rescue ::Twitter::Error::Forbidden + + [url, referer_url].each do |x| + if x =~ PAGE + return service.image_urls(api_response) + end + end + rescue Twitter::Error::NotFound + url + end + memoize :image_urls + + def page_url + [url, referer_url].each do |x| + if self.class.status_id_from_url(x).present? + return x + end + end + + return super end - def normalize_for_artist_finder! - url.downcase + def profile_url + if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(\w+)}i + if $1 != "i" + return "https://twitter.com/#{$1}" + end + end + + "https://twitter.com/" + api_response.attrs[:user][:screen_name] + rescue Twitter::Error::NotFound + nil + end + + def artist_name + api_response.attrs[:user][:screen_name] + rescue Twitter::Error::NotFound + nil + end + + def artist_commentary_title + "" + end + + def artist_commentary_desc + api_response.attrs[:full_text] + rescue Twitter::Error::NotFound + nil end def normalizable_for_artist_finder? - true + url =~ PAGE end + def normalize_for_artist_finder + profile_url.downcase + end + + def tags + api_response.attrs[:entities][:hashtags].map do |text:, indices:| + [text, "https://twitter.com/hashtag/#{text}"] + end + end + memoize :tags + def dtext_artist_commentary_desc url_replacements = api_response.urls.map do |obj| [obj.url.to_s, obj.expanded_url.to_s] @@ -63,19 +105,23 @@ module Sources::Strategies desc = desc.gsub(%r!@([a-zA-Z0-9_]+)!, '"@\\1":[https://twitter.com/\\1]') desc.strip end + memoize :dtext_artist_commentary_desc + + public + + def service + TwitterService.new + end + memoize :service + + def api_response + service.client.status(status_id, tweet_mode: "extended") + end + memoize :api_response def status_id - self.class.status_id_from_url(@url) || self.class.status_id_from_url(@referer_url) - end - - # https://twitter.com/i/web/status/943446161586733056 - # https://twitter.com/motty08111213/status/943446161586733056 - def self.status_id_from_url(url) - if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(?:i/web|\w+)/status/(\d+)}i - $1 - else - nil - end + [url, referer_url].map {|x| self.class.status_id_from_url(x)}.compact.first end + memoize :status_id end end diff --git a/app/logical/twitter_service.rb b/app/logical/twitter_service.rb index c57096db3..08babbb81 100644 --- a/app/logical/twitter_service.rb +++ b/app/logical/twitter_service.rb @@ -1,21 +1,22 @@ class TwitterService + extend Memoist + def client raise "Twitter API keys not set" if Danbooru.config.twitter_api_key.nil? - @client ||= begin - rest_client = ::Twitter::REST::Client.new do |config| - config.consumer_key = Danbooru.config.twitter_api_key - config.consumer_secret = Danbooru.config.twitter_api_secret - if bearer_token = Cache.get("twitter-api-token") - config.bearer_token = bearer_token - end + rest_client = ::Twitter::REST::Client.new do |config| + config.consumer_key = Danbooru.config.twitter_api_key + config.consumer_secret = Danbooru.config.twitter_api_secret + if bearer_token = Cache.get("twitter-api-token") + config.bearer_token = bearer_token end - - Cache.put("twitter-api-token", rest_client.bearer_token) - - rest_client end + + Cache.put("twitter-api-token", rest_client.bearer_token) + + rest_client end + memoize :client def extract_urls_for_status(tweet) tweet.media.map do |obj| diff --git a/app/logical/upload_service.rb b/app/logical/upload_service.rb index 1a6804706..76d99101f 100644 --- a/app/logical/upload_service.rb +++ b/app/logical/upload_service.rb @@ -49,7 +49,7 @@ class UploadService @upload.update(status: "processing") if @upload.file.nil? && Utils.is_downloadable?(source) - @upload.file = Utils.download_for_upload(source, @upload) + @upload.file = Utils.download_for_upload(@upload) end if @upload.file.present? @@ -111,7 +111,9 @@ class UploadService p.image_width = upload.image_width p.image_height = upload.image_height p.rating = upload.rating - p.source = upload.source + if upload.source.present? + p.source = Sources::Strategies.find(upload.source, upload.referer_url).canonical_url + end p.file_size = upload.file_size p.uploader_id = upload.uploader_id p.uploader_ip_addr = upload.uploader_ip_addr diff --git a/app/logical/upload_service/controller_helper.rb b/app/logical/upload_service/controller_helper.rb index a456e6b44..d401be88b 100644 --- a/app/logical/upload_service/controller_helper.rb +++ b/app/logical/upload_service/controller_helper.rb @@ -4,13 +4,8 @@ class UploadService upload = Upload.new if Utils.is_downloadable?(url) && file.nil? - download = Downloads::File.new(url) - normalized_url = download.rewrite_url() - post = if normalized_url.nil? - Post.where("SourcePattern(lower(posts.source)) = ?", url).first - else - Post.where("SourcePattern(lower(posts.source)) IN (?)", [url, normalized_url]).first - end + strategy = Sources::Strategies.find(url, ref) + post = Post.where("SourcePattern(lower(posts.source)) IN (?)", [url, strategy.canonical_url]).first if post.nil? # this gets called from UploadsController#new so we need @@ -19,13 +14,15 @@ class UploadService end begin - source = Sources::Site.new(url, :referer_url => ref) + download = Downloads::File.new(url, ref) remote_size = download.size rescue Exception end - return [upload, post, source, normalized_url, remote_size] - elsif file + return [upload, post, strategy, remote_size] + end + + if file # this gets called via XHR so we can process sync Preprocessor.new(file: file).delayed_start(CurrentUser.id) end @@ -35,9 +32,7 @@ class UploadService def self.batch(url, ref = nil) if url - source = Sources::Site.new(url, :referer_url => ref) - source.get - return source + return Sources::Strategies.find(url, ref) end end end diff --git a/app/logical/upload_service/preprocessor.rb b/app/logical/upload_service/preprocessor.rb index 0889f29a5..b1ebaab49 100644 --- a/app/logical/upload_service/preprocessor.rb +++ b/app/logical/upload_service/preprocessor.rb @@ -1,5 +1,7 @@ class UploadService class Preprocessor + extend Memoist + attr_reader :params, :original_post_id def initialize(params) @@ -15,31 +17,40 @@ class UploadService params[:md5_confirmation] end - def referer + def referer_url params[:referer_url] end - def normalized_source - @normalized_source ||= begin - Downloads::File.new(params[:source]).rewrite_url - end + def strategy + Sources::Strategies.find(source, referer_url) end + memoize :strategy + + # When searching posts we have to use the canonical source + def canonical_source + strategy.canonical_url + end + memoize :canonical_source def in_progress? if Utils.is_downloadable?(source) - Upload.where(status: "preprocessing", source: normalized_source).or(Upload.where(status: "preprocessing", alt_source: normalized_source)).exists? - elsif md5.present? - Upload.where(status: "preprocessing", md5: md5).exists? - else - false + return Upload.where(status: "preprocessing", source: source).exists? end + + if md5.present? + return Upload.where(status: "preprocessing", md5: md5).exists? + end + + false end def predecessor if Utils.is_downloadable?(source) - Upload.where(status: ["preprocessed", "preprocessing"]).where(source: normalized_source).or(Upload.where(status: ["preprocessed", "preprocessing"], alt_source: normalized_source)).first - elsif md5.present? - Upload.where(status: ["preprocessed", "preprocessing"], md5: md5).first + return Upload.where(status: ["preprocessed", "preprocessing"], source: source).first + end + + if md5.present? + return Upload.where(status: ["preprocessed", "preprocessing"], md5: md5).first end end @@ -59,34 +70,31 @@ class UploadService def start! if Utils.is_downloadable?(source) CurrentUser.as_system do - if Post.tag_match("source:#{normalized_source}").where.not(id: original_post_id).exists? - raise ActiveRecord::RecordNotUnique.new("A post with source #{normalized_source} already exists") + if Post.tag_match("source:#{canonical_source}").where.not(id: original_post_id).exists? + raise ActiveRecord::RecordNotUnique.new("A post with source #{canonical_source} already exists") end end - if Upload.where(source: normalized_source, status: "completed").exists? - raise ActiveRecord::RecordNotUnique.new("A completed upload with source #{normalized_source} already exists") + if Upload.where(source: source, status: "completed").exists? + raise ActiveRecord::RecordNotUnique.new("A completed upload with source #{source} already exists") end - if Upload.where(source: normalized_source).where("status like ?", "error%").exists? - raise ActiveRecord::RecordNotUnique.new("An errored upload with source #{normalized_source} already exists") + if Upload.where(source: source).where("status like ?", "error%").exists? + raise ActiveRecord::RecordNotUnique.new("An errored upload with source #{source} already exists") end end params[:rating] ||= "q" params[:tag_string] ||= "tagme" - upload = Upload.create!(params) + begin upload.update(status: "preprocessing") - if Utils.is_downloadable?(source) - # preserve the original source (for twitter, the twimg:orig - # source, while the status url is stored in upload.source) - upload.alt_source = normalized_source - file = Utils.download_for_upload(source, upload) - elsif params[:file].present? + if params[:file].present? file = params[:file] + elsif Utils.is_downloadable?(source) + file = Utils.download_for_upload(upload) end Utils.process_file(upload, file, original_post_id: original_post_id) @@ -109,10 +117,7 @@ class UploadService # goto whoever submitted the form pred.initialize_attributes - # we went through a lot of trouble normalizing the source, - # so don't overwrite it with whatever the user provided - pred.source = "" if pred.source.nil? - pred.attributes = self.params.except(:source) + pred.attributes = self.params # if a file was uploaded after the preprocessing occurred, # then process the file and overwrite whatever the preprocessor diff --git a/app/logical/upload_service/replacer.rb b/app/logical/upload_service/replacer.rb index f1f6a8961..1ae034f56 100644 --- a/app/logical/upload_service/replacer.rb +++ b/app/logical/upload_service/replacer.rb @@ -74,8 +74,8 @@ class UploadService if replacement.replacement_file.present? replacement.replacement_url = "file://#{replacement.replacement_file.original_filename}" - elsif upload.downloaded_source.present? - replacement.replacement_url = upload.downloaded_source + elsif upload.source.present? + replacement.replacement_url = Sources::Strategies.canonical(upload.source, upload.referer_url) end if md5_changed @@ -93,7 +93,7 @@ class UploadService post.image_width = upload.image_width post.image_height = upload.image_height post.file_size = upload.file_size - post.source = upload.downloaded_source || upload.source + post.source = Sources::Strategies.canonical(upload.source, upload.referer_url) post.tag_string = upload.tag_string update_ugoira_frame_data(post, upload) diff --git a/app/logical/upload_service/utils.rb b/app/logical/upload_service/utils.rb index 9e4a86c62..f3781b384 100644 --- a/app/logical/upload_service/utils.rb +++ b/app/logical/upload_service/utils.rb @@ -200,37 +200,19 @@ class UploadService tags.join(" ") end - def download_from_source(source, referer_url: nil) - download = Downloads::File.new(source, referer_url: referer_url) - - file = download.download! - context = { - downloaded_source: download.downloaded_source, - source: download.source - } + def download_for_upload(upload) + download = Downloads::File.new(upload.source, upload.referer_url) + file, strategy = download.download! - if download.data[:is_ugoira] - context[:ugoira] = { - frame_data: download.data[:ugoira_frame_data], - content_type: download.data[:ugoira_content_type] + if download.data[:ugoira_frame_data] + upload.context = { + "ugoira" => { + "frame_data" => download.data[:ugoira_frame_data], + "content_type" => "image/jpeg" + } } end - yield(context) - - return file - end - - def download_for_upload(source, upload) - file = download_from_source(source, referer_url: upload.referer_url) do |context| - upload.downloaded_source = context[:downloaded_source] - upload.source = context[:source] - - if context[:ugoira] - upload.context = { ugoira: context[:ugoira] } - end - end - return file end end diff --git a/app/models/artist.rb b/app/models/artist.rb index d224a8fcf..0ef989b19 100644 --- a/app/models/artist.rb +++ b/app/models/artist.rb @@ -152,7 +152,7 @@ class Artist < ApplicationRecord url = ArtistUrl.normalize(url) artists = [] - # return [] unless Sources::Site.new(url).normalized_for_artist_finder? + # return [] unless Sources::Strategies.find(url).normalized_for_artist_finder? while artists.empty? && url.size > 10 u = url.sub(/\/+$/, "") + "/" @@ -481,13 +481,8 @@ class Artist < ApplicationRecord end def search_for_profile(url) - source = Sources::Site.new(url) - if source.strategy - source.get - find_all_by_url(source.profile_url) - else - nil - end + source = Sources::Strategies.find(url) + find_all_by_url(source.profile_url) rescue Net::OpenTimeout, PixivApiClient::Error raise if Rails.env.test? nil diff --git a/app/models/artist_url.rb b/app/models/artist_url.rb index 851d6c8a8..88c01cfce 100644 --- a/app/models/artist_url.rb +++ b/app/models/artist_url.rb @@ -22,7 +22,7 @@ class ArtistUrl < ApplicationRecord url = url.sub(%r!^http://blog\d+\.fc2!, "http://blog.fc2") url = url.sub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog.fc2") url = url.sub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog.fc2") - url = url.sub(%r!^(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/') + # url = url.sub(%r!^(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/') url = url.sub(%r!^http://pictures.hentai-foundry.com//!, "http://pictures.hentai-foundry.com/") if url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} url = url.sub(%r{\Ahttps?://(.+?)\.deviantart\.com(.*)}, 'http://www.deviantart.com/\1\2') @@ -30,13 +30,17 @@ class ArtistUrl < ApplicationRecord # the strategy won't always work for twitter because it looks for a status url = url.downcase if url =~ %r!^https?://(?:mobile\.)?twitter\.com! - + begin - url = Sources::Site.new(url).normalize_for_artist_finder! + source = Sources::Strategies.find(url) + + if !source.normalized_for_artist_finder? && source.normalizable_for_artist_finder? + url = source.normalize_for_artist_finder + end rescue Net::OpenTimeout, PixivApiClient::Error raise if Rails.env.test? - rescue Sources::Site::NoStrategyError end + url = url.gsub(/\/+\Z/, "") url = url.gsub(%r!^https://!, "http://") url + "/" @@ -102,10 +106,6 @@ class ArtistUrl < ApplicationRecord end def normalize - if !Sources::Site.new(normalized_url).normalized_for_artist_finder? - self.normalized_url = self.class.normalize(url) - end - rescue Sources::Site::NoStrategyError self.normalized_url = self.class.normalize(url) end diff --git a/app/models/post.rb b/app/models/post.rb index 766beeef6..ba62c8c83 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -1682,7 +1682,11 @@ class Post < ApplicationRecord module PixivMethods def parse_pixiv_id - self.pixiv_id = Sources::Strategies::Pixiv.new(source).illust_id_from_url + self.pixiv_id = nil + + if Sources::Strategies::Pixiv.match?(source) + self.pixiv_id = Sources::Strategies::Pixiv.new(source).illust_id + end end end @@ -1790,10 +1794,8 @@ class Post < ApplicationRecord return if has_tag?("artist_request") || has_tag?("official_art") return if tags.any? { |t| t.category == Tag.categories.artist } - site = Sources::Site.new(source) + site = Sources::Strategies.find(source) self.warnings[:base] << "Artist tag is required. Create a new tag with [[artist:]]. Ask on the forum if you need naming help" - rescue Sources::Site::NoStrategyError => e - # unrecognized source; do nothing. end def has_copyright_tag diff --git a/app/models/upload.rb b/app/models/upload.rb index 1a59188db..fc0f7e3e3 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -47,8 +47,7 @@ class Upload < ApplicationRecord end - attr_accessor :as_pending, - :referer_url, :downloaded_source, :replaced_post, :file + attr_accessor :as_pending, :replaced_post, :file belongs_to :uploader, :class_name => "User" belongs_to :post, optional: true @@ -63,6 +62,7 @@ class Upload < ApplicationRecord validates :file_ext, format: { with: /jpg|gif|png|swf|webm|mp4|zip/ }, allow_nil: true validates_with Validator serialize :context, JSON + scope :preprocessed, -> { where(status: "preprocessed") } def initialize_attributes self.uploader_id = CurrentUser.id diff --git a/app/views/uploads/_image.html.erb b/app/views/uploads/_image.html.erb index 42a1e6994..e63f6bbc3 100644 --- a/app/views/uploads/_image.html.erb +++ b/app/views/uploads/_image.html.erb @@ -1,8 +1,8 @@ <% if params[:url] %> - <% if ImageProxy.needs_proxy?(@normalized_url) %> - <%= image_tag(image_proxy_uploads_path(:url => @normalized_url), :title => "Preview", :id => "image") %> + <% if ImageProxy.needs_proxy?(@source.image_url) %> + <%= image_tag(image_proxy_uploads_path(:url => @source.image_url), :title => "Preview", :id => "image") %> <% else %> - <%= image_tag(@normalized_url, :title => "Preview", :id => "image") %> + <%= image_tag(@source.image_url, :title => "Preview", :id => "image") %> <% end %>
    diff --git a/app/views/uploads/index.html.erb b/app/views/uploads/index.html.erb index 16c59d2e0..dc526624b 100644 --- a/app/views/uploads/index.html.erb +++ b/app/views/uploads/index.html.erb @@ -47,11 +47,10 @@
    - <% if upload.alt_source.present? %> + <% if upload.referer_url.present? %> - Alternate Source - <%= link_to_if (upload.alt_source =~ %r!\Ahttps?://!i), (upload.alt_source.presence.try(:truncate, 50) || content_tag(:em, "none")), upload.source %> - <%= link_to "»", uploads_path(search: params[:search].merge(source_matches: upload.alt_source)) %> + Referer + <%= URI.parse(upload.referer_url).host rescue nil %>
    <% end %> diff --git a/app/views/uploads/new.html.erb b/app/views/uploads/new.html.erb index 5ad8ed55e..881640df2 100644 --- a/app/views/uploads/new.html.erb +++ b/app/views/uploads/new.html.erb @@ -20,9 +20,7 @@ <%= form_for(@upload, :html => {:multipart => true, :class => "simple_form", :id => "form"}) do |f| %> <%= hidden_field_tag :url, params[:url] %> <%= hidden_field_tag :ref, params[:ref] %> - <%= hidden_field_tag :normalized_url, @normalized_url %> <%= f.hidden_field :md5_confirmation %> - <%= f.hidden_field :referer_url, :value => @source.try(:referer_url) %> <% if CurrentUser.can_upload_free? %>
    diff --git a/config/docker/compose.yml b/config/docker/compose.yml index 6bd23d7f0..1ca8228fa 100644 --- a/config/docker/compose.yml +++ b/config/docker/compose.yml @@ -49,6 +49,7 @@ services: - DANBOORU_NICO_SEIGA_LOGIN - DANBOORU_NICO_SEIGA_PASSWORD - DANBOORU_PERSIST_PIXIV_SESSION + - DANBOORU_TUMBLR_CONSUMER_KEY - CIRCLE_NODE_TOTAL - CIRCLE_NODE_INDEX - CIRCLE_BUILD_IMAGE diff --git a/db/migrate/20180816230604_rename_alt_source_on_uploads.rb b/db/migrate/20180816230604_rename_alt_source_on_uploads.rb new file mode 100644 index 000000000..fb7971bb6 --- /dev/null +++ b/db/migrate/20180816230604_rename_alt_source_on_uploads.rb @@ -0,0 +1,5 @@ +class RenameAltSourceOnUploads < ActiveRecord::Migration[5.2] + def change + rename_column :uploads, :alt_source, :referer_url + end +end diff --git a/db/structure.sql b/db/structure.sql index de7a979c7..01a7a9368 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -433,8 +433,8 @@ CREATE TABLE public.advertisement_hits ( id integer NOT NULL, advertisement_id integer NOT NULL, ip_addr inet NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -464,15 +464,15 @@ ALTER SEQUENCE public.advertisement_hits_id_seq OWNED BY public.advertisement_hi CREATE TABLE public.advertisements ( id integer NOT NULL, referral_url text NOT NULL, - ad_type character varying(255) NOT NULL, - status character varying(255) NOT NULL, + ad_type character varying NOT NULL, + status character varying NOT NULL, hit_count integer DEFAULT 0 NOT NULL, width integer NOT NULL, height integer NOT NULL, - file_name character varying(255) NOT NULL, + file_name character varying NOT NULL, is_work_safe boolean DEFAULT false NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -502,8 +502,8 @@ ALTER SEQUENCE public.advertisements_id_seq OWNED BY public.advertisements.id; CREATE TABLE public.amazon_backups ( id integer NOT NULL, last_id integer, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -564,7 +564,7 @@ ALTER SEQUENCE public.anti_voters_id_seq OWNED BY public.anti_voters.id; CREATE TABLE public.api_keys ( id integer NOT NULL, user_id integer NOT NULL, - key character varying(255) NOT NULL, + key character varying NOT NULL, created_at timestamp without time zone, updated_at timestamp without time zone ); @@ -612,8 +612,8 @@ CREATE TABLE public.artist_commentaries ( original_description text DEFAULT ''::text NOT NULL, translated_title text DEFAULT ''::text NOT NULL, translated_description text DEFAULT ''::text NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -649,8 +649,8 @@ CREATE TABLE public.artist_commentary_versions ( original_description text, translated_title text, translated_description text, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -682,8 +682,8 @@ CREATE TABLE public.artist_urls ( artist_id integer NOT NULL, url text NOT NULL, normalized_url text NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone, is_active boolean DEFAULT true NOT NULL ); @@ -714,16 +714,16 @@ ALTER SEQUENCE public.artist_urls_id_seq OWNED BY public.artist_urls.id; CREATE TABLE public.artist_versions ( id integer NOT NULL, artist_id integer NOT NULL, - name character varying(255) NOT NULL, + name character varying NOT NULL, updater_id integer NOT NULL, updater_ip_addr inet NOT NULL, is_active boolean DEFAULT true NOT NULL, other_names text, - group_name character varying(255), + group_name character varying, url_string text, is_banned boolean DEFAULT false NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -752,15 +752,15 @@ ALTER SEQUENCE public.artist_versions_id_seq OWNED BY public.artist_versions.id; CREATE TABLE public.artists ( id integer NOT NULL, - name character varying(255) NOT NULL, + name character varying NOT NULL, creator_id integer NOT NULL, is_active boolean DEFAULT true NOT NULL, is_banned boolean DEFAULT false NOT NULL, other_names text, other_names_index tsvector, - group_name character varying(255), - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + group_name character varying, + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -793,8 +793,8 @@ CREATE TABLE public.bans ( reason text NOT NULL, banner_id integer NOT NULL, expires_at timestamp without time zone NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -826,7 +826,7 @@ CREATE TABLE public.bulk_update_requests ( user_id integer NOT NULL, forum_topic_id integer, script text NOT NULL, - status character varying(255) DEFAULT 'pending'::character varying NOT NULL, + status character varying DEFAULT 'pending'::character varying NOT NULL, created_at timestamp without time zone, updated_at timestamp without time zone, approver_id integer, @@ -863,8 +863,8 @@ CREATE TABLE public.comment_votes ( comment_id integer NOT NULL, user_id integer NOT NULL, score integer NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -899,8 +899,8 @@ CREATE TABLE public.comments ( ip_addr inet NOT NULL, body_index tsvector NOT NULL, score integer DEFAULT 0 NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone, updater_id integer, updater_ip_addr inet, do_not_bump_post boolean DEFAULT false NOT NULL, @@ -941,10 +941,10 @@ CREATE TABLE public.delayed_jobs ( run_at timestamp without time zone, locked_at timestamp without time zone, failed_at timestamp without time zone, - locked_by character varying(255), - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, - queue character varying(255) + locked_by character varying, + created_at timestamp without time zone, + updated_at timestamp without time zone, + queue character varying ); @@ -1013,8 +1013,8 @@ CREATE TABLE public.dmails ( message_index tsvector NOT NULL, is_read boolean DEFAULT false NOT NULL, is_deleted boolean DEFAULT false NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone, creator_ip_addr inet NOT NULL, is_spam boolean DEFAULT false ); @@ -2149,8 +2149,8 @@ CREATE TABLE public.forum_posts ( body text NOT NULL, text_index tsvector NOT NULL, is_deleted boolean DEFAULT false NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -2182,7 +2182,7 @@ CREATE TABLE public.forum_subscriptions ( user_id integer, forum_topic_id integer, last_read_at timestamp without time zone, - delete_key character varying(255) + delete_key character varying ); @@ -2246,14 +2246,14 @@ CREATE TABLE public.forum_topics ( id integer NOT NULL, creator_id integer NOT NULL, updater_id integer NOT NULL, - title character varying(255) NOT NULL, + title character varying NOT NULL, response_count integer DEFAULT 0 NOT NULL, is_sticky boolean DEFAULT false NOT NULL, is_locked boolean DEFAULT false NOT NULL, is_deleted boolean DEFAULT false NOT NULL, text_index tsvector NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone, category_id integer DEFAULT 0 NOT NULL, min_level integer DEFAULT 0 NOT NULL ); @@ -2287,8 +2287,8 @@ CREATE TABLE public.ip_bans ( creator_id integer NOT NULL, ip_addr inet NOT NULL, reason text NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -2320,9 +2320,9 @@ CREATE TABLE public.janitor_trials ( creator_id integer NOT NULL, user_id integer NOT NULL, original_level integer, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, - status character varying(255) DEFAULT 'active'::character varying NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone, + status character varying DEFAULT 'active'::character varying NOT NULL ); @@ -2353,8 +2353,8 @@ CREATE TABLE public.mod_actions ( id integer NOT NULL, creator_id integer NOT NULL, description text NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone, category integer ); @@ -2387,8 +2387,8 @@ CREATE TABLE public.news_updates ( message text NOT NULL, creator_id integer NOT NULL, updater_id integer NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -2427,8 +2427,8 @@ CREATE TABLE public.note_versions ( height integer NOT NULL, is_active boolean DEFAULT true NOT NULL, body text NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone, version integer DEFAULT 0 NOT NULL ); @@ -2467,8 +2467,8 @@ CREATE TABLE public.notes ( is_active boolean DEFAULT true NOT NULL, body text NOT NULL, body_index tsvector NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone, version integer DEFAULT 0 NOT NULL ); @@ -2500,7 +2500,7 @@ CREATE TABLE public.pixiv_ugoira_frame_data ( id integer NOT NULL, post_id integer, data text NOT NULL, - content_type character varying(255) NOT NULL + content_type character varying NOT NULL ); @@ -2529,16 +2529,16 @@ ALTER SEQUENCE public.pixiv_ugoira_frame_data_id_seq OWNED BY public.pixiv_ugoir CREATE TABLE public.pools ( id integer NOT NULL, - name character varying(255), + name character varying, creator_id integer NOT NULL, description text, is_active boolean DEFAULT true NOT NULL, post_ids text DEFAULT ''::text NOT NULL, post_count integer DEFAULT 0 NOT NULL, is_deleted boolean DEFAULT false NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, - category character varying(255) DEFAULT 'series'::character varying NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone, + category character varying DEFAULT 'series'::character varying NOT NULL ); @@ -2571,8 +2571,8 @@ CREATE TABLE public.post_appeals ( creator_id integer NOT NULL, creator_ip_addr inet, reason text, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -2635,9 +2635,9 @@ CREATE TABLE public.post_disapprovals ( id integer NOT NULL, user_id integer NOT NULL, post_id integer NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, - reason character varying(255) DEFAULT 'legacy'::character varying, + created_at timestamp without time zone, + updated_at timestamp without time zone, + reason character varying DEFAULT 'legacy'::character varying, message text ); @@ -2672,8 +2672,8 @@ CREATE TABLE public.post_flags ( creator_ip_addr inet NOT NULL, reason text, is_resolved boolean DEFAULT false NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -2758,8 +2758,8 @@ CREATE TABLE public.post_votes ( post_id integer NOT NULL, user_id integer NOT NULL, score integer NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -2788,13 +2788,13 @@ ALTER SEQUENCE public.post_votes_id_seq OWNED BY public.post_votes.id; CREATE TABLE public.posts ( id integer NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone, up_score integer DEFAULT 0 NOT NULL, down_score integer DEFAULT 0 NOT NULL, score integer DEFAULT 0 NOT NULL, - source character varying(255) DEFAULT ''::character varying NOT NULL, - md5 character varying(255) NOT NULL, + source character varying DEFAULT ''::character varying NOT NULL, + md5 character varying NOT NULL, rating character(1) DEFAULT 'q'::bpchar NOT NULL, is_note_locked boolean DEFAULT false NOT NULL, is_rating_locked boolean DEFAULT false NOT NULL, @@ -2817,7 +2817,7 @@ CREATE TABLE public.posts ( tag_count_artist integer DEFAULT 0 NOT NULL, tag_count_character integer DEFAULT 0 NOT NULL, tag_count_copyright integer DEFAULT 0 NOT NULL, - file_ext character varying(255) NOT NULL, + file_ext character varying NOT NULL, file_size integer NOT NULL, image_width integer NOT NULL, image_height integer NOT NULL, @@ -2890,7 +2890,7 @@ ALTER SEQUENCE public.saved_searches_id_seq OWNED BY public.saved_searches.id; -- CREATE TABLE public.schema_migrations ( - version character varying(255) NOT NULL + version character varying NOT NULL ); @@ -2931,14 +2931,14 @@ ALTER SEQUENCE public.super_voters_id_seq OWNED BY public.super_voters.id; CREATE TABLE public.tag_aliases ( id integer NOT NULL, - antecedent_name character varying(255) NOT NULL, - consequent_name character varying(255) NOT NULL, + antecedent_name character varying NOT NULL, + consequent_name character varying NOT NULL, creator_id integer NOT NULL, creator_ip_addr inet NOT NULL, forum_topic_id integer, status text DEFAULT 'pending'::text NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone, post_count integer DEFAULT 0 NOT NULL, approver_id integer, forum_post_id integer @@ -2970,15 +2970,15 @@ ALTER SEQUENCE public.tag_aliases_id_seq OWNED BY public.tag_aliases.id; CREATE TABLE public.tag_implications ( id integer NOT NULL, - antecedent_name character varying(255) NOT NULL, - consequent_name character varying(255) NOT NULL, + antecedent_name character varying NOT NULL, + consequent_name character varying NOT NULL, descendant_names text NOT NULL, creator_id integer NOT NULL, creator_ip_addr inet NOT NULL, forum_topic_id integer, status text DEFAULT 'pending'::text NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone, approver_id integer, forum_post_id integer ); @@ -3010,14 +3010,14 @@ ALTER SEQUENCE public.tag_implications_id_seq OWNED BY public.tag_implications.i CREATE TABLE public.tag_subscriptions ( id integer NOT NULL, creator_id integer NOT NULL, - name character varying(255) NOT NULL, + name character varying NOT NULL, tag_query text NOT NULL, post_ids text NOT NULL, is_public boolean DEFAULT true NOT NULL, last_accessed_at timestamp without time zone, is_opted_in boolean DEFAULT false NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -3046,7 +3046,7 @@ ALTER SEQUENCE public.tag_subscriptions_id_seq OWNED BY public.tag_subscriptions CREATE TABLE public.tags ( id integer NOT NULL, - name character varying(255) NOT NULL, + name character varying NOT NULL, post_count integer DEFAULT 0 NOT NULL, category integer DEFAULT 0 NOT NULL, related_tags text, @@ -3094,8 +3094,8 @@ CREATE UNLOGGED TABLE public.token_buckets ( CREATE TABLE public.uploads ( id integer NOT NULL, source text, - file_path character varying(255), - content_type character varying(255), + file_path character varying, + content_type character varying, rating character(1) NOT NULL, uploader_id integer NOT NULL, uploader_ip_addr inet NOT NULL, @@ -3103,9 +3103,9 @@ CREATE TABLE public.uploads ( status text DEFAULT 'pending'::text NOT NULL, backtrace text, post_id integer, - md5_confirmation character varying(255), - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, + md5_confirmation character varying, + created_at timestamp without time zone, + updated_at timestamp without time zone, server text, parent_id integer, md5 character varying, @@ -3117,7 +3117,7 @@ CREATE TABLE public.uploads ( artist_commentary_title text, include_artist_commentary boolean, context text, - alt_source text + referer_url text ); @@ -3148,10 +3148,10 @@ CREATE TABLE public.user_feedback ( id integer NOT NULL, user_id integer NOT NULL, creator_id integer NOT NULL, - category character varying(255) NOT NULL, + category character varying NOT NULL, body text NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -3180,15 +3180,15 @@ ALTER SEQUENCE public.user_feedback_id_seq OWNED BY public.user_feedback.id; CREATE TABLE public.user_name_change_requests ( id integer NOT NULL, - status character varying(255) DEFAULT 'pending'::character varying NOT NULL, + status character varying DEFAULT 'pending'::character varying NOT NULL, user_id integer NOT NULL, approver_id integer, - original_name character varying(255), - desired_name character varying(255), + original_name character varying, + desired_name character varying, change_reason text, rejection_reason text, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -3217,10 +3217,10 @@ ALTER SEQUENCE public.user_name_change_requests_id_seq OWNED BY public.user_name CREATE TABLE public.user_password_reset_nonces ( id integer NOT NULL, - key character varying(255) NOT NULL, - email character varying(255) NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL + key character varying NOT NULL, + email character varying NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone ); @@ -3249,12 +3249,12 @@ ALTER SEQUENCE public.user_password_reset_nonces_id_seq OWNED BY public.user_pas CREATE TABLE public.users ( id integer NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, - name character varying(255) NOT NULL, - password_hash character varying(255) NOT NULL, - email character varying(255), - email_verification_key character varying(255), + created_at timestamp without time zone, + updated_at timestamp without time zone, + name character varying NOT NULL, + password_hash character varying NOT NULL, + email character varying, + email_verification_key character varying, inviter_id integer, level integer DEFAULT 0 NOT NULL, base_upload_limit integer DEFAULT 10 NOT NULL, @@ -3266,13 +3266,13 @@ CREATE TABLE public.users ( note_update_count integer DEFAULT 0 NOT NULL, favorite_count integer DEFAULT 0 NOT NULL, comment_threshold integer DEFAULT '-1'::integer NOT NULL, - default_image_size character varying(255) DEFAULT 'large'::character varying NOT NULL, + default_image_size character varying DEFAULT 'large'::character varying NOT NULL, favorite_tags text, blacklisted_tags text DEFAULT 'spoilers guro scat furry -rating:s'::text, - time_zone character varying(255) DEFAULT 'Eastern Time (US & Canada)'::character varying NOT NULL, + time_zone character varying DEFAULT 'Eastern Time (US & Canada)'::character varying NOT NULL, bcrypt_password_hash text, per_page integer DEFAULT 20 NOT NULL, custom_style text, @@ -3310,11 +3310,11 @@ CREATE TABLE public.wiki_page_versions ( wiki_page_id integer NOT NULL, updater_id integer NOT NULL, updater_ip_addr inet NOT NULL, - title character varying(255) NOT NULL, + title character varying NOT NULL, body text NOT NULL, is_locked boolean NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone, other_names text, is_deleted boolean DEFAULT false NOT NULL ); @@ -3346,12 +3346,12 @@ ALTER SEQUENCE public.wiki_page_versions_id_seq OWNED BY public.wiki_page_versio CREATE TABLE public.wiki_pages ( id integer NOT NULL, creator_id integer NOT NULL, - title character varying(255) NOT NULL, + title character varying NOT NULL, body text NOT NULL, body_index tsvector NOT NULL, is_locked boolean DEFAULT false NOT NULL, - created_at timestamp without time zone NOT NULL, - updated_at timestamp without time zone NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone, updater_id integer, other_names text, other_names_index tsvector, @@ -4770,6 +4770,14 @@ ALTER TABLE ONLY public.saved_searches ADD CONSTRAINT saved_searches_pkey PRIMARY KEY (id); +-- +-- Name: schema_migrations schema_migrations_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.schema_migrations + ADD CONSTRAINT schema_migrations_pkey PRIMARY KEY (version); + + -- -- Name: super_voters super_voters_pkey; Type: CONSTRAINT; Schema: public; Owner: - -- @@ -6994,6 +7002,13 @@ CREATE INDEX index_posts_on_parent_id ON public.posts USING btree (parent_id); CREATE INDEX index_posts_on_pixiv_id ON public.posts USING btree (pixiv_id) WHERE (pixiv_id IS NOT NULL); +-- +-- Name: index_posts_on_source; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_posts_on_source ON public.posts USING btree (lower((source)::text)); + + -- -- Name: index_posts_on_source_pattern; Type: INDEX; Schema: public; Owner: - -- @@ -7015,6 +7030,13 @@ CREATE INDEX index_posts_on_tags_index ON public.posts USING gin (tag_index); CREATE INDEX index_posts_on_uploader_id ON public.posts USING btree (uploader_id); +-- +-- Name: index_posts_on_uploader_ip_addr; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_posts_on_uploader_ip_addr ON public.posts USING btree (uploader_ip_addr); + + -- -- Name: index_saved_searches_on_labels; Type: INDEX; Schema: public; Owner: - -- @@ -7121,10 +7143,10 @@ CREATE UNIQUE INDEX index_token_buckets_on_user_id ON public.token_buckets USING -- --- Name: index_uploads_on_alt_source; Type: INDEX; Schema: public; Owner: - +-- Name: index_uploads_on_referer_url; Type: INDEX; Schema: public; Owner: - -- -CREATE INDEX index_uploads_on_alt_source ON public.uploads USING btree (alt_source); +CREATE INDEX index_uploads_on_referer_url ON public.uploads USING btree (referer_url); -- @@ -7267,13 +7289,6 @@ CREATE INDEX index_wiki_pages_on_title_pattern ON public.wiki_pages USING btree CREATE INDEX index_wiki_pages_on_updated_at ON public.wiki_pages USING btree (updated_at); --- --- Name: unique_schema_migrations; Type: INDEX; Schema: public; Owner: - --- - -CREATE UNIQUE INDEX unique_schema_migrations ON public.schema_migrations USING btree (version); - - -- -- Name: favorites insert_favorites_trigger; Type: TRIGGER; Schema: public; Owner: - -- @@ -7502,13 +7517,13 @@ INSERT INTO "schema_migrations" (version) VALUES ('20171230220225'), ('20180113211343'), ('20180116001101'), -('20180310070233'), ('20180403231351'), ('20180413224239'), ('20180425194016'), ('20180516222413'), ('20180517190048'), ('20180518175154'), -('20180804203201'); +('20180804203201'), +('20180816230604'); diff --git a/lib/tasks/images.rake b/lib/tasks/images.rake index 7f7c2714d..aeddf5f40 100644 --- a/lib/tasks/images.rake +++ b/lib/tasks/images.rake @@ -87,7 +87,7 @@ namespace :images do post = Post.find(post_id) post.source =~ /(\d{5,})/ if illust_id = $1 - response = PixivApiClient.new.works(illust_id) + response = PixivApiClient.new.work(illust_id) upload = Upload.new upload.source = response.pages.first upload.file_ext = post.file_ext diff --git a/test/functional/uploads_controller_test.rb b/test/functional/uploads_controller_test.rb index fdfcf752a..cda38df26 100644 --- a/test/functional/uploads_controller_test.rb +++ b/test/functional/uploads_controller_test.rb @@ -68,6 +68,20 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest end end + context "for a direct link twitter post" do + setup do + @ref = "https://twitter.com/onsen_musume_jp/status/865534101918330881" + @source = "https://pbs.twimg.com/media/DAL-ntWV0AEbhes.jpg:orig" + end + + should "trigger the preprocessor" do + assert_difference(-> { Upload.preprocessed.count }, 1) do + get_auth new_upload_path, @user, params: {:url => @source, :ref => @ref} + Delayed::Worker.new.work_off + end + end + end + context "for a twitter post" do setup do @source = "https://twitter.com/frappuccino/status/566030116182949888" @@ -89,6 +103,20 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest end end + context "for a pixiv post" do + setup do + @ref = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482" + @source = "https://i.pximg.net/img-original/img/2015/03/14/17/53/32/49270482_p0.jpg" + end + + should "trigger the preprocessor" do + assert_difference(-> { Upload.preprocessed.count }, 1) do + get_auth new_upload_path, @user, params: {:url => @source, :ref => @ref} + Delayed::Worker.new.work_off + end + end + end + context "for a post that has already been uploaded" do setup do as_user do @@ -149,6 +177,48 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest end context "create action" do + context "when a preprocessed upload already exists" do + context "for twitter" do + setup do + as_user do + @ref = "https://twitter.com/onsen_musume_jp/status/865534101918330881" + @source = "https://pbs.twimg.com/media/DAL-ntWV0AEbhes.jpg:orig" + @upload = create(:upload, status: "preprocessed", source: @source, referer_url: @ref, image_width: 0, image_height: 0, file_size: 0, md5: "something", file_ext: "jpg") + end + end + + should "update the predecessor" do + assert_difference(->{ Post.count }, 1) do + assert_difference(->{ Upload.count }, 0) do + post_auth uploads_path, @user, params: {:upload => {:tag_string => "aaa", :rating => "q", :source => @source, :referer_url => @ref}} + end + end + post = Post.last + assert_match(/aaa/, post.tag_string) + end + end + + context "for pixiv" do + setup do + @ref = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482" + @source = "https://i.pximg.net/img-original/img/2015/03/14/17/53/32/49270482_p0.jpg" + as_user do + @upload = create(:upload, status: "preprocessed", source: @source, referer_url: @ref, image_width: 0, image_height: 0, file_size: 0, md5: "something", file_ext: "jpg") + end + end + + should "update the predecessor" do + assert_difference(->{ Post.count }, 1) do + assert_difference(->{ Upload.count }, 0) do + post_auth uploads_path, @user, params: {:upload => {:tag_string => "aaa", :rating => "q", :source => @source, :referer_url => @ref}} + end + end + post = Post.last + assert_match(/aaa/, post.tag_string) + end + end + end + should "create a new upload" do assert_difference("Upload.count", 1) do file = Rack::Test::UploadedFile.new("#{Rails.root}/test/files/test.jpg", "image/jpeg") diff --git a/test/models/upload_service_test.rb b/test/models/upload_service_test.rb index 5f0249a87..3acf5b91c 100644 --- a/test/models/upload_service_test.rb +++ b/test/models/upload_service_test.rb @@ -17,31 +17,56 @@ class UploadServiceTest < ActiveSupport::TestCase context "::Utils" do subject { UploadService::Utils } - context "#download_from_source" do - setup do - @jpeg = "https://upload.wikimedia.org/wikipedia/commons/c/c5/Moraine_Lake_17092005.jpg" - @ugoira = "https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip" - end - - should "work on a jpeg" do - file = subject.download_from_source(@jpeg) do |context| - assert_not_nil(context[:downloaded_source]) - assert_not_nil(context[:source]) + context "#download_for_upload" do + context "for a non-source site" do + setup do + @source = "https://upload.wikimedia.org/wikipedia/commons/c/c5/Moraine_Lake_17092005.jpg" + @upload = Upload.new + @upload.source = @source end - assert_operator(File.size(file.path), :>, 0) - file.close + should "work on a jpeg" do + file = subject.download_for_upload(@upload) + + assert_operator(File.size(file.path), :>, 0) + + file.close + end end - should "work on an ugoira url" do - file = subject.download_from_source(@ugoira, referer_url: "https://www.pixiv.net") do |context| - assert_not_nil(context[:downloaded_source]) - assert_not_nil(context[:source]) - assert_not_nil(context[:ugoira]) + context "for a pixiv" do + setup do + @source = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247350" + @upload = Upload.new + @upload.source = @source end - assert_operator(File.size(file.path), :>, 0) - file.close + should "work on an ugoira url" do + file = subject.download_for_upload(@upload) + + assert_operator(File.size(file.path), :>, 0) + + file.close + end + end + + context "for a pixiv ugoira" do + setup do + @source = "https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip" + @referer = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364" + @upload = Upload.new + @upload.source = @source + @upload.referer_url = @referer + end + + should "work on an ugoira url" do + file = subject.download_for_upload(@upload) + + assert_not_nil(@upload.context["ugoira"]) + assert_operator(File.size(file.path), :>, 0) + + file.close + end end end @@ -343,9 +368,6 @@ class UploadServiceTest < ActiveSupport::TestCase FactoryBot.create(:user) end CurrentUser.ip_addr = "127.0.0.1" - @jpeg = "https://raikou1.donmai.us/d3/4e/d34e4cf0a437a5d65f8e82b7bcd02606.jpg" - @ugoira = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364" - @video = "https://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4" end teardown do @@ -356,82 +378,100 @@ class UploadServiceTest < ActiveSupport::TestCase context "for twitter" do setup do @source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large" - @norm_source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig" @ref = "https://twitter.com/nounproject/status/540944400767922176" end - should "record the correct source when a referer is given" do + should "download the file" do @service = subject.new(source: @source, referer_url: @ref) @upload = @service.start! - assert_equal(@ref, @upload.source) - end - - should "save the twimg url in alt_source" do - @service = subject.new(source: @source, referer_url: @ref) - @upload = @service.start! - assert_equal(@norm_source, @upload.alt_source) + assert_equal("preprocessed", @upload.status) + assert_equal(9800, @upload.file_size) + assert_equal("png", @upload.file_ext) + assert_equal("f5fe24f3a3a13885285f6627e04feec9", @upload.md5) + assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "png", :original))) + assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "png", :preview))) end end context "for pixiv" do setup do - @source = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=65981735" - @ref = "http://www.pixiv.net/member.php?id=696859" - @direct = "https://i.pximg.net/img-original/img/2017/11/21/05/12/37/65981735_p0.jpg" + @source = "https://i.pximg.net/img-original/img/2014/10/29/09/27/19/46785915_p0.jpg" + @ref = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=46785915" end - should "record the correct source" do + should "download the file" do @service = subject.new(source: @source, referer_url: @ref) @upload = @service.start! - assert_equal(@direct, @upload.source) - end + assert_equal("preprocessed", @upload.status) + assert_equal(294591, @upload.file_size) + assert_equal("jpg", @upload.file_ext) + assert_equal("3cb1ef624714c15dbb2d6e7b1d57faef", @upload.md5) + assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :original))) + assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :preview))) + end end - should "work for a jpeg" do - @service = subject.new(source: @jpeg) - @upload = @service.start! - assert_equal("preprocessed", @upload.status) - assert_not_nil(@upload.md5) - assert_equal("jpg", @upload.file_ext) - assert_operator(@upload.file_size, :>, 0) - assert_not_nil(@upload.source) - assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :original))) - # this image is not large enough to generate a large file - #assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :large))) - assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :preview))) + context "for pixiv ugoira" do + setup do + @source = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364" + end + + should "download the file" do + @service = subject.new(source: @source) + @upload = @service.start! + assert_equal("preprocessed", @upload.status) + assert_equal(2804, @upload.file_size) + assert_equal("zip", @upload.file_ext) + assert_equal("cad1da177ef309bf40a117c17b8eecf5", @upload.md5) + assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "zip", :original))) + assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "zip", :large))) + end end - should "work for an ugoira" do - @service = subject.new(source: @ugoira) - @upload = @service.start! - assert_equal("preprocessed", @upload.status) - assert_not_nil(@upload.md5) - assert_equal("zip", @upload.file_ext) - assert_operator(@upload.file_size, :>, 0) - assert_not_nil(@upload.source) - assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "zip", :original))) - assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "zip", :large))) + context "for null" do + setup do + @source = "https://raikou1.donmai.us/93/f4/93f4dd66ef1eb11a89e56d31f9adc8d0.jpg" + end + + should "download the file" do + @service = subject.new(source: @source) + @upload = @service.start! + assert_equal("preprocessed", @upload.status) + assert_equal(181309, @upload.file_size) + assert_equal("jpg", @upload.file_ext) + assert_equal("93f4dd66ef1eb11a89e56d31f9adc8d0", @upload.md5) + assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :original))) + assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :large))) + assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :preview))) + end end - should "work for a video" do - @service = subject.new(source: @video) - @upload = @service.start! - assert_equal("preprocessed", @upload.status) - assert_not_nil(@upload.md5) - assert_equal("mp4", @upload.file_ext) - assert_operator(@upload.file_size, :>, 0) - assert_not_nil(@upload.source) - assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "mp4", :original))) - assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "mp4", :preview))) + context "for a video" do + setup do + @source = "https://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4" + end + + should "work for a video" do + @service = subject.new(source: @source) + @upload = @service.start! + assert_equal("preprocessed", @upload.status) + assert_not_nil(@upload.md5) + assert_equal("mp4", @upload.file_ext) + assert_operator(@upload.file_size, :>, 0) + assert_not_nil(@upload.source) + assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "mp4", :original))) + assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "mp4", :preview))) + end end context "on timeout errors" do setup do + @source = "https://raikou1.donmai.us/93/f4/93f4dd66ef1eb11a89e56d31f9adc8d0.jpg" HTTParty.stubs(:get).raises(Net::ReadTimeout) end should "leave the upload in an error state" do - @service = subject.new(source: @video) + @service = subject.new(source: @source) @upload = @service.start! assert_match(/error:/, @upload.status) end @@ -445,41 +485,15 @@ class UploadServiceTest < ActiveSupport::TestCase FactoryBot.create(:user) end CurrentUser.ip_addr = "127.0.0.1" + @source = "https://twitter.com/nounproject/status/540944400767922176" end - context "for twitter" do - setup do - @source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large" - @norm_source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig" - @ref = "https://twitter.com/nounproject/status/540944400767922176" - end - - should "record the correct source when a referer is given" do - @service = subject.new(source: @source, referer_url: @ref) - @upload = @service.start! - @service = subject.new(source: @source) - @service.finish! - @upload.reload - - assert_equal(@ref, @upload.source) - end - end - - context "for pixiv" do - setup do - @source = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=65981735" - @ref = "http://www.pixiv.net/member.php?id=696859" - @direct = "https://i.pximg.net/img-original/img/2017/11/21/05/12/37/65981735_p0.jpg" - end - - should "record the correct source" do - @service = subject.new(source: @source, referer_url: @ref) - @upload = @service.start! - @service = subject.new(source: @source) - @service.finish! - @upload.reload - assert_equal(@direct, @upload.source) - end + should "overwrite the attributes" do + @service = subject.new(source: @source, rating: 'e') + @upload = @service.start! + @service.finish! + @upload.reload + assert_equal('e', @upload.rating) end end end @@ -637,7 +651,7 @@ class UploadServiceTest < ActiveSupport::TestCase image_url = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig" as_user { @post.replace!(replacement_url: replacement_url) } - assert_equal(image_url, @post.replacements.last.replacement_url) + assert_equal(replacement_url, @post.replacements.last.replacement_url) end end @@ -1027,7 +1041,33 @@ class UploadServiceTest < ActiveSupport::TestCase CurrentUser.ip_addr = nil end - context "for an ugoira" do + context "for a pixiv" do + setup do + @source = "https://i.pximg.net/img-original/img/2017/11/21/05/12/37/65981735_p0.jpg" + @ref = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=65981735" + @upload = FactoryBot.create(:jpg_upload, file_size: 1000, md5: "12345", file_ext: "jpg", image_width: 100, image_height: 100, source: @source, referer_url: @ref) + end + + should "record the canonical source" do + post = subject.new({}).create_post_from_upload(@upload) + assert_equal(@source, post.source) + end + end + + context "for a twitter" do + setup do + @source = "https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:large" + @ref = "https://twitter.com/aranobu/status/817736083567820800" + @upload = FactoryBot.create(:jpg_upload, file_size: 1000, md5: "12345", file_ext: "jpg", image_width: 100, image_height: 100, source: @source, referer_url: @ref) + end + + should "record the canonical source" do + post = subject.new({}).create_post_from_upload(@upload) + assert_equal(@ref, post.source) + end + end + + context "for a pixiv ugoira" do setup do @upload = FactoryBot.create(:ugoira_upload, file_size: 1000, md5: "12345", file_ext: "jpg", image_width: 100, image_height: 100, context: UGOIRA_CONTEXT) end diff --git a/test/test_helpers/download_test_helper.rb b/test/test_helpers/download_test_helper.rb index 95cd9e51d..172f8d101 100644 --- a/test/test_helpers/download_test_helper.rb +++ b/test/test_helpers/download_test_helper.rb @@ -1,23 +1,22 @@ require 'ptools' module DownloadTestHelper - def assert_downloaded(expected_filesize, source) - download = Downloads::File.new(source) - tempfile = download.download! + def assert_downloaded(expected_filesize, source, referer=nil) + download = Downloads::File.new(source, referer) + tempfile, strategy = download.download! assert_equal(expected_filesize, tempfile.size, "Tested source URL: #{source}") rescue Net::OpenTimeout skip "Remote connection to #{source} failed" end - def assert_rewritten(expected_source, test_source) - download = Downloads::File.new(test_source) - - rewritten_source, _, _ = download.before_download(test_source, {}) + def assert_rewritten(expected_source, test_source, test_referer=nil) + strategy = Sources::Strategies.find(test_source, test_referer) + rewritten_source = strategy.image_url assert_match(expected_source, rewritten_source, "Tested source URL: #{test_source}") end - def assert_not_rewritten(source) - assert_rewritten(source, source) + def assert_not_rewritten(source, referer=nil) + assert_rewritten(source, source, referer) end def check_ffmpeg diff --git a/test/unit/artist_test.rb b/test/unit/artist_test.rb index f28ff2c59..a602e80c8 100644 --- a/test/unit/artist_test.rb +++ b/test/unit/artist_test.rb @@ -229,9 +229,7 @@ class ArtistTest < ActiveSupport::TestCase should "find the correct artist for page URLs" do assert_artist_found("artgerm", "http://www.deviantart.com/artgerm/art/Peachy-Princess-Ver-2-457220550") - assert_artist_found("trixia", "http://www.deviantart.com/trixdraws/art/My-Queen-426745289") - assert_artist_found("trixia", "http://www.deviantart.com/trixdraws/gallery/#/d722mrt") end should "find the correct artist for image URLs" do @@ -281,11 +279,6 @@ class ArtistTest < ActiveSupport::TestCase assert_artist_found("bkub", "http://www.pixiv.net/i/46239857") end - should "find nothing for malformed URLs" do - assert_artist_not_found("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=herpderp") - assert_artist_not_found("http://www.pixiv.net/wharrgarbl") - end - should "find nothing for bad IDs" do assert_raises(PixivApiClient::BadIDError) do assert_artist_not_found("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=32049358") diff --git a/test/unit/artist_url_test.rb b/test/unit/artist_url_test.rb index b6f42349a..437fa6900 100644 --- a/test/unit/artist_url_test.rb +++ b/test/unit/artist_url_test.rb @@ -45,6 +45,56 @@ class ArtistUrlTest < ActiveSupport::TestCase end end + context "artstation urls" do + setup do + @urls = [ + FactoryBot.create(:artist_url, url: "https://www.artstation.com/koyorin"), + FactoryBot.create(:artist_url, url: "https://www.artstation.com/artist/koyorin"), + FactoryBot.create(:artist_url, url: "https://koyorin.artstation.com"), + FactoryBot.create(:artist_url, url: "https://www.artstation.com/artwork/04XA4") + ] + end + + should "normalize" do + assert_equal("http://www.artstation.com/koyorin/", @urls[0].normalized_url) + assert_equal("http://www.artstation.com/koyorin/", @urls[1].normalized_url) + assert_equal("http://www.artstation.com/koyorin/", @urls[2].normalized_url) + assert_equal("http://www.artstation.com/jeyrain/", @urls[3].normalized_url) + end + end + + context "deviantart urls" do + setup do + @urls = [ + FactoryBot.create(:artist_url, url: "https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484"), + FactoryBot.create(:artist_url, url: "http://noizave.deviantart.com/art/test-post-please-ignore-685436408"), + FactoryBot.create(:artist_url, url: "https://www.deviantart.com/noizave") + ] + end + + should "normalize" do + assert_equal("http://www.deviantart.com/aeror404/", @urls[0].normalized_url) + assert_equal("http://www.deviantart.com/noizave/", @urls[1].normalized_url) + assert_equal("http://www.deviantart.com/noizave/", @urls[2].normalized_url) + end + end + + context "nicoseiga urls" do + setup do + @urls = [ + FactoryBot.create(:artist_url, url: "http://seiga.nicovideo.jp/user/illust/7017777"), + FactoryBot.create(:artist_url, url: "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663"), + FactoryBot.create(:artist_url, url: "http://seiga.nicovideo.jp/seiga/im4937663") + ] + end + + should "normalize" do + assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @urls[0].normalized_url) + assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @urls[1].normalized_url) + assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @urls[2].normalized_url) + end + end + should "normalize fc2 urls" do url = FactoryBot.create(:artist_url, :url => "http://blog55.fc2.com/monet") assert_equal("http://blog55.fc2.com/monet", url.url) @@ -56,13 +106,13 @@ class ArtistUrlTest < ActiveSupport::TestCase end should "normalize deviant art artist urls" do - url = FactoryBot.create(:artist_url, :url => "https://caidychen.deviantart.com/") - assert_equal("http://www.deviantart.com/caidychen/", url.normalized_url) + url = FactoryBot.create(:artist_url, :url => "https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484") + assert_equal("http://www.deviantart.com/aeror404/", url.normalized_url) end should "normalize nico seiga artist urls" do - url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/user/illust/1826959") - assert_equal("http://seiga.nicovideo.jp/user/illust/1826959/", url.normalized_url) + url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/user/illust/7017777") + assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url) url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/seiga/im4937663") assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url) @@ -80,9 +130,9 @@ class ArtistUrlTest < ActiveSupport::TestCase end should "normalize twitter urls" do - url = FactoryBot.create(:artist_url, :url => "https://twitter.com/MONET/status/12345") - assert_equal("https://twitter.com/MONET/status/12345", url.url) - assert_equal("http://twitter.com/monet/status/12345/", url.normalized_url) + url = FactoryBot.create(:artist_url, :url => "https://twitter.com/aoimanabu/status/892370963630743552") + assert_equal("https://twitter.com/aoimanabu/status/892370963630743552", url.url) + assert_equal("http://twitter.com/aoimanabu/", url.normalized_url) end end end diff --git a/test/unit/downloads/art_station_test.rb b/test/unit/downloads/art_station_test.rb index 528210bdb..e97bd7e73 100644 --- a/test/unit/downloads/art_station_test.rb +++ b/test/unit/downloads/art_station_test.rb @@ -4,31 +4,35 @@ module Downloads class ArtStationTest < ActiveSupport::TestCase context "a download for a (small) artstation image" do setup do - @source = "https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974" - @download = Downloads::File.new(@source) + @asset = "https://cdnb3.artstation.com/p/assets/images/images/003/716/071/small/aoi-ogata-hate-city.jpg?1476754974" + @download = Downloads::File.new(@asset) end should "download the large image instead" do - assert_equal("https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974", @download.source) + file, strategy = @download.download! + assert_equal(517_706, ::File.size(file.path)) end end context "for an image where an original does not exist" do setup do - @source = "https://cdna.artstation.com/p/assets/images/images/004/730/278/large/mendel-oh-dragonll.jpg" - @download = Downloads::File.new(@source) - @download.download! + @asset = "https://cdna.artstation.com/p/assets/images/images/004/730/278/large/mendel-oh-dragonll.jpg" + @download = Downloads::File.new(@asset) end should "not try to download the original" do - assert_equal("https://cdna.artstation.com/p/assets/images/images/004/730/278/large/mendel-oh-dragonll.jpg", @download.source) + file, strategy = @download.download! + assert_equal(449_047, ::File.size(file.path)) end end context "a download for an ArtStation image hosted on CloudFlare" do + setup do + @asset = "https://cdnb.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974" + end + should "return the original file, not the polished file" do - @source = "https://cdnb.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974" - assert_downloaded(517_706, @source) # polished size: 502_052 + assert_downloaded(517_706, @asset) # polished size: 502_052 end end @@ -36,11 +40,12 @@ module Downloads setup do @source = "https://dantewontdie.artstation.com/projects/YZK5q" @download = Downloads::File.new(@source) - @download.download! end should "download the original image instead" do - assert_equal("https://cdna.artstation.com/p/assets/images/images/006/066/534/large/yinan-cui-reika.jpg?1495781565", @download.source) + file, strategy = @download.download! + + assert_equal(237_651, ::File.size(file.path)) end end end diff --git a/test/unit/downloads/deviant_art_test.rb b/test/unit/downloads/deviant_art_test.rb index 729f947dd..d35433b92 100644 --- a/test/unit/downloads/deviant_art_test.rb +++ b/test/unit/downloads/deviant_art_test.rb @@ -8,11 +8,7 @@ module Downloads @source = "http://starbitt.deviantart.com/art/09271X-636962118" @download = Downloads::File.new(@source) - @tempfile = @download.download! - end - - should "set the html page as the source" do - assert_equal("https://orig00.deviantart.net/82ef/f/2016/271/7/1/aaaaaa_by_starbitt-daj8b46.gif", @download.source) + @tempfile, strategy = @download.download! end should "work" do diff --git a/test/unit/downloads/file_test.rb b/test/unit/downloads/file_test.rb index 8d53a6552..885ef6bd6 100644 --- a/test/unit/downloads/file_test.rb +++ b/test/unit/downloads/file_test.rb @@ -41,7 +41,7 @@ module Downloads end should "store the file in the tempfile path" do - tempfile = @download.download! + tempfile, strategy = @download.download! assert_equal(@source, @download.source) assert_operator(tempfile.size, :>, 0, "should have data") end diff --git a/test/unit/downloads/pixiv_test.rb b/test/unit/downloads/pixiv_test.rb index d19bd67e5..25c4e2248 100644 --- a/test/unit/downloads/pixiv_test.rb +++ b/test/unit/downloads/pixiv_test.rb @@ -4,6 +4,7 @@ module Downloads class PixivTest < ActiveSupport::TestCase def setup super + Downloads::File.stubs(:is_cloudflare?).returns(false) load_pixiv_tokens! end @@ -13,29 +14,6 @@ module Downloads end context "in all cases" do - # Test an old illustration (one uploaded before 2014-09-16). New - # /img-original/ and /img-master/ URLs currently don't work for images - # uploaded before this date. Only old /imgXX/img/username/ URLs work. - context "downloading an old PNG illustration" do - setup do - @medium_page = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=14901720" - @big_page = "http://www.pixiv.net/member_illust.php?mode=big&illust_id=14901720" - - @new_small_thumbnail = "http://i1.pixiv.net/c/150x150/img-master/img/2010/11/30/08/39/58/14901720_p0_master1200.jpg" - @new_medium_thumbnail = "http://i1.pixiv.net/c/600x600/img-master/img/2010/11/30/08/39/58/14901720_p0_master1200.jpg" - @new_full_size_image = "http://i1.pixiv.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png" - - @file_size = 1261 - end - - should "work when using new URLs" do - # Don't know the actual file size of the thumbnails since they don't work. - assert_downloaded(1083, @new_small_thumbnail) - assert_downloaded(1083, @new_medium_thumbnail) - assert_downloaded(@file_size, @new_full_size_image) - end - end - # Test a new illustration (one uploaded after 2014-09-30). New illustrations # must use /img-original/ for full size URLs. Old /imgXX/img/username/ style URLs # don't work for images uploaded after this date. @@ -103,21 +81,6 @@ module Downloads end end - context "downloading a bad id image" do - setup do - @bad_id_full = "https://i.pximg.net/img-original/img/2017/11/22/01/06/44/65991677_p0.png" - @bad_id_sample = "https://i.pximg.net/c/600x600/img-master/img/2017/11/22/01/06/44/65991677_p0_master1200.jpg" - end - - should "not raise an error when rewriting the url" do - assert_nothing_raised { assert_not_rewritten(@bad_id_full) } - end - - should_eventually "rewrite bad id samples to full size" do - assert_rewritten(@bad_id_full, @bad_id_sample) - end - end - context "downloading a ugoira" do setup do @medium_page = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364" @@ -138,6 +101,8 @@ module Downloads context "downloading a profile image" do should "download new profile images" do + skip "profile images are no longer supported" + @file_url = "https://i.pximg.net/user-profile/img/2014/12/18/10/31/23/8733472_7dc7310db6cc37163af145d04499e411_170.jpg" @file_size = 23_328 @@ -149,8 +114,10 @@ module Downloads context "downloading a background image" do should "download the image" do - @file_url = "http://i1.pixiv.net/background/img/2016/05/17/12/05/48/2074388_d4ac52034f7ca0af3e083d59fde7e97f.jpg" - @file_size = 386_678 + skip "background images are no longer supported" + + @file_url = "https://i.pximg.net/background/img/2015/10/25/08/45/27/198128_77ddf78cdb162e3d1c0d5134af185813.jpg" + @file_size = 0 assert_not_rewritten(@file_url) assert_downloaded(@file_size, @file_url) @@ -159,21 +126,23 @@ module Downloads context "downloading a novel image" do should "download new novel images" do - @file_url = "http://i1.pixiv.net/novel-cover-original/img/2016/11/03/20/10/58/7436075_f75af69f3eacd1656d3733c72aa959cf.jpg" - @file_size = 316_311 + @file_url = "https://i.pximg.net/novel-cover-original/img/2017/07/27/23/14/17/8465454_80685d10e6df4d7d53ad347ddc18a36b.jpg" + @ref = 'https://www.pixiv.net/novel/show.php?id=8465454&mode=cover' + @file_size = 532_129 - assert_not_rewritten(@file_url) - assert_downloaded(@file_size, @file_url) + assert_not_rewritten(@file_url, @ref) + assert_downloaded(@file_size, @file_url, @ref) end end context "downloading a pixiv fanbox image" do should "work" do - @file_url = "https://fanbox.pixiv.net/images/post/31757/w/1200/0CdXtgr4al3t43gQG4NZLnpQ.jpeg" - @file_size = 200_239 + @source = "https://www.pixiv.net/fanbox/creator/12491073/post/82406" + @file_url = "https://fanbox.pixiv.net/images/post/82406/D833IKA7FIesJXL8xx39rrG0.jpeg" + @file_size = 873_387 - assert_not_rewritten(@file_url) - assert_downloaded(@file_size, @file_url) + assert_not_rewritten(@file_url, @source) + assert_downloaded(@file_size, @file_url, @source) end end end @@ -181,12 +150,11 @@ module Downloads context "An ugoira site for pixiv" do setup do @download = Downloads::File.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364") - @tempfile = @download.download! + @tempfile, strategy = @download.download! @tempfile.close! end should "capture the data" do - assert_equal("https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip", @download.source) assert_equal(2, @download.data[:ugoira_frame_data].size) if @download.data[:ugoira_frame_data][0]["file"] assert_equal([{"file"=>"000000.jpg", "delay"=>125}, {"file"=>"000001.jpg", "delay"=>125}], @download.data[:ugoira_frame_data]) diff --git a/test/unit/downloads/tumblr_test.rb b/test/unit/downloads/tumblr_test.rb index adfbe7881..a5b2bd17e 100644 --- a/test/unit/downloads/tumblr_test.rb +++ b/test/unit/downloads/tumblr_test.rb @@ -2,43 +2,52 @@ require 'test_helper' module Downloads class TumblrTest < ActiveSupport::TestCase + # Currently there's no way to obtain the raw version of these images, + # so we have to change the tests to validate against the 1280 version + context "a download for a tumblr 500 sample" do - should "instead download the raw version" do + should "instead download the 1280 version" do skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key + @ref = "https://noizave.tumblr.com/post/162206271767" @source = "https://24.media.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_500.jpg" - @rewrite = "http://data.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_raw.jpg" - assert_rewritten(@rewrite, @source) - assert_downloaded(196_617, @source) + @rewrite = "https://media.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_1280.jpg" + assert_rewritten(@rewrite, @source, @ref) + assert_downloaded(113909, @source, @ref) + # assert_downloaded(196_617, @source) end end context "a download for a *.media.tumblr.com/tumblr_$id_$size image without a larger size" do should "download the same version" do skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key + @ref = "https://noizave.tumblr.com/post/162206271767" @source = "https://25.media.tumblr.com/tumblr_lxbzel2H5y1r9yjhso1_500.jpg" - @rewrite = "http://data.tumblr.com/tumblr_lxbzel2H5y1r9yjhso1_500.jpg" - assert_rewritten(@rewrite, @source) - assert_downloaded(90_122, @source) + @rewrite = "https://media.tumblr.com/tumblr_lxbzel2H5y1r9yjhso1_1280.jpg" + assert_rewritten(@rewrite, @source, @ref) + assert_downloaded(41803, @source, @ref) + # assert_downloaded(90_122, @source) end end context "a download for a *.media.tumblr.com/tumblr_$id_$size image with a larger size" do should "download the best available version" do skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key + @ref = "https://noizave.tumblr.com/post/162206271767" @source = "https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png" - @rewrite = "http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png" - assert_rewritten(@rewrite, @source) - assert_downloaded(34_060, @source) + @rewrite = "https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png" + assert_rewritten(@rewrite, @source, @ref) + assert_downloaded(62658, @source, @ref) end end context "a download for a *.media.tumblr.com/$hash/tumblr_$id_rN_$size image" do should "download the best available version" do skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key + @ref = "https://noizave.tumblr.com/post/162206271767" @source = "https://33.media.tumblr.com/4b7fecf9a5a8284fbaefb051a2369b55/tumblr_npozqfwc9h1rt6u7do1_r1_500.gif" - @rewrite = "http://data.tumblr.com/4b7fecf9a5a8284fbaefb051a2369b55/tumblr_npozqfwc9h1rt6u7do1_r1_raw.gif" - assert_rewritten(@rewrite, @source) - assert_downloaded(1_234_017, @source) + @rewrite = "https://media.tumblr.com/4b7fecf9a5a8284fbaefb051a2369b55/tumblr_npozqfwc9h1rt6u7do1_r1_1280.gif" + assert_rewritten(@rewrite, @source, @ref) + assert_downloaded(1_234_017, @source, @ref) end end @@ -46,40 +55,33 @@ module Downloads should "download the best available version" do skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key @source = "https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif" - @rewrite = "http://data.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif" - assert_rewritten(@rewrite, @source) - assert_downloaded(110_348, @source) + @rewrite = "https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_1280.gif" + assert_rewritten(@rewrite, @source, @ref) + assert_downloaded(110_348, @source, @ref) end end - context "a download for a data.tumblr.com/$id_$size image with a larger size" do + context "a download for a media.tumblr.com/$id_$size image with a larger size" do should "download the best available version" do skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key - @source = "http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg" - @rewrite = "http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg" - assert_rewritten(@rewrite, @source) - assert_downloaded(153_885, @source) + @ref = "https://noizave.tumblr.com/post/162206271767" + @source = "http://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg" + @rewrite = "https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_1280.jpg" + assert_rewritten(@rewrite, @source, @ref) + assert_downloaded(122413, @source) + # assert_downloaded(153_885, @source) end end - context "a download for a data.tumblr.com/tumblr_$id_$size.jpg image" do + context "a download for a media.tumblr.com/tumblr_$id_$size.jpg image" do should "download the best available version" do skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key - @source = "http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_250.jpg" - @rewrite = "http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg" - assert_rewritten(@rewrite, @source) - assert_downloaded(296_399, @source) - end - end - - context "a download for a gs1.wac.edgecastcdn.net image" do - should "rewrite to the full tumblr version" do - skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key - @source = "https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png" - @rewrite = "http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png" - - assert_downloaded(34_060, @source) - assert_rewritten(@rewrite, @source) + @ref = "https://noizave.tumblr.com/post/162206271767" + @source = "http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_250.jpg" + @rewrite = "https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg" + assert_rewritten(@rewrite, @source, @ref) + assert_downloaded(101869, @source, @ref) + # assert_downloaded(296_399, @source) end end @@ -87,9 +89,9 @@ module Downloads should "download the best available version" do skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key @source = "https://noizave.tumblr.com/post/162206271767" - @rewrite = "http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_raw.png" + @rewrite = "https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png" - assert_downloaded(3_620, @source) + assert_downloaded(3655, @source) assert_rewritten(@rewrite, @source) end end diff --git a/test/unit/downloads/twitter_test.rb b/test/unit/downloads/twitter_test.rb index 0aee27c30..277384247 100644 --- a/test/unit/downloads/twitter_test.rb +++ b/test/unit/downloads/twitter_test.rb @@ -8,17 +8,19 @@ module Downloads @source = "https://twitter.com/CincinnatiZoo/status/859073537713328129" @rewrite = "https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4" assert_rewritten(@rewrite, @source) - assert_downloaded(8_602_983, @source) + + # this takes awhile so just skip it unless we really want to test it + # assert_downloaded(8_602_983, @source) end end context "downloading a 'https://twitter.com/:user/status/:id/photo/:n' card url" do should "download the orig file" do skip "Twitter key is not set" unless Danbooru.config.twitter_api_key - @source = "https://twitter.com/masayasuf/status/870734961778630656/photo/1" - @rewrite = "https://pbs.twimg.com/media/DBV40M2UIAAHYlt.jpg:orig" + @source = "https://twitter.com/ry_o_ta_/status/1024316791688843269/photo/1" + @rewrite = "https://pbs.twimg.com/media/Djcar72VsAAZsGa.jpg:orig" assert_rewritten(@rewrite, @source) - assert_downloaded(788_206, @source) + assert_downloaded(103812, @source) end end @@ -37,8 +39,9 @@ module Downloads skip "Twitter key is not set" unless Danbooru.config.twitter_api_key @source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large" @rewrite = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig" - assert_rewritten(@rewrite, @source) - assert_downloaded(9800, @source) + @ref = "https://twitter.com/nounproject/status/540944400767922176" + assert_rewritten(@rewrite, @source, @ref) + assert_downloaded(9800, @source, @ref) end end end diff --git a/test/unit/post_replacement_test.rb b/test/unit/post_replacement_test.rb deleted file mode 100644 index 5a20bbbf3..000000000 --- a/test/unit/post_replacement_test.rb +++ /dev/null @@ -1,37 +0,0 @@ -require 'test_helper' - -class PostReplacementTest < ActiveSupport::TestCase - def setup - super - - mock_iqdb_service! - Delayed::Worker.delay_jobs = true # don't delete the old images right away - - @system = FactoryBot.create(:user, created_at: 2.weeks.ago) - User.stubs(:system).returns(@system) - - @uploader = FactoryBot.create(:user, created_at: 2.weeks.ago, can_upload_free: true) - @replacer = FactoryBot.create(:user, created_at: 2.weeks.ago, can_approve_posts: true) - CurrentUser.user = @replacer - CurrentUser.ip_addr = "127.0.0.1" - end - - def teardown - super - - CurrentUser.user = nil - CurrentUser.ip_addr = nil - Delayed::Worker.delay_jobs = false - end - - context "Replacing" do - setup do - CurrentUser.scoped(@uploader, "127.0.0.2") do - attributes = FactoryBot.attributes_for(:jpg_upload, as_pending: "0", tag_string: "lowres tag1") - service = UploadService.new(attributes) - upload = service.start! - @post = upload.post - end - end - end -end diff --git a/test/unit/post_test.rb b/test/unit/post_test.rb index 73a5c5a08..e105ea7f8 100644 --- a/test/unit/post_test.rb +++ b/test/unit/post_test.rb @@ -1480,26 +1480,6 @@ class PostTest < ActiveSupport::TestCase assert_equal(18557054, @post.pixiv_id) @post.pixiv_id = nil end - - context "but doesn't have a pixiv id" do - should "save the pixiv id" do - @post.pixiv_id = 1234 - @post.update(source: "http://i1.pixiv.net/novel-cover-original/img/2016/11/03/20/10/58/7436075_f75af69f3eacd1656d3733c72aa959cf.jpg") - assert_nil(@post.pixiv_id) - - @post.pixiv_id = 1234 - @post.update(source: "http://i2.pixiv.net/background/img/2016/10/30/12/27/30/7059005_da9946b806c10d391a81ed1117cd33d6.jpg") - assert_nil(@post.pixiv_id) - - @post.pixiv_id = 1234 - @post.update(source: "http://i1.pixiv.net/img15/img/omega777/novel/2612734.jpg") - assert_nil(@post.pixiv_id) - - @post.pixiv_id = 1234 - @post.update(source: "http://img08.pixiv.net/profile/nice/1408837.jpg") - assert_nil(@post.pixiv_id) - end - end end should "normalize pixiv links" do diff --git a/test/unit/sources/art_station_test.rb b/test/unit/sources/art_station_test.rb index c6937ab61..24e5db125 100644 --- a/test/unit/sources/art_station_test.rb +++ b/test/unit/sources/art_station_test.rb @@ -4,8 +4,7 @@ module Sources class ArtStationTest < ActiveSupport::TestCase context "The source site for an art station artwork page" do setup do - @site = Sources::Site.new("https://www.artstation.com/artwork/04XA4") - @site.get + @site = Sources::Strategies.find("https://www.artstation.com/artwork/04XA4") end should "get the image url" do @@ -32,8 +31,7 @@ module Sources context "The source site for an art station projects page" do setup do - @site = Sources::Site.new("https://dantewontdie.artstation.com/projects/YZK5q") - @site.get + @site = Sources::Strategies.find("https://dantewontdie.artstation.com/projects/YZK5q") end should "get the image url" do @@ -61,8 +59,7 @@ module Sources context "The source site for a www.artstation.com/artwork/$slug page" do setup do - @site = Sources::Site.new("https://www.artstation.com/artwork/cody-from-sf") - @site.get + @site = Sources::Strategies.find("https://www.artstation.com/artwork/cody-from-sf") end should "get the image url" do @@ -75,8 +72,7 @@ module Sources setup do @url = "https://cdna.artstation.com/p/assets/images/images/006/029/978/large/amama-l-z.jpg" @ref = "https://www.artstation.com/artwork/4BWW2" - @site = Sources::Site.new(@url, referer_url: @ref) - @site.get + @site = Sources::Strategies.find(@url, @ref) end should "fetch the source data" do @@ -86,8 +82,7 @@ module Sources context "The source site for an ArtStation gallery" do setup do - @site = Sources::Site.new("https://www.artstation.com/artwork/BDxrA") - @site.get + @site = Sources::Strategies.find("https://www.artstation.com/artwork/BDxrA") end should "get only image urls, not video urls" do diff --git a/test/unit/sources/deviantart_test.rb b/test/unit/sources/deviantart_test.rb index 1098d26e2..e77155efe 100644 --- a/test/unit/sources/deviantart_test.rb +++ b/test/unit/sources/deviantart_test.rb @@ -9,8 +9,7 @@ module Sources context "A path-based artist url" do setup do - @site = Sources::Site.new("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484") - @site.get + @site = Sources::Strategies.find("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484") end should "work" do @@ -20,8 +19,7 @@ module Sources context "The source for a private DeviantArt image URL" do setup do - @site = Sources::Site.new("https://pre00.deviantart.net/423b/th/pre/i/2017/281/e/0/mindflayer_girl01_by_nickbeja-dbpxdt8.png") - @site.get + @site = Sources::Strategies.find("https://pre00.deviantart.net/423b/th/pre/i/2017/281/e/0/mindflayer_girl01_by_nickbeja-dbpxdt8.png") end should "work" do @@ -31,25 +29,24 @@ module Sources context "The source for a download-disabled DeviantArt artwork page" do should "get the image url" do - @site = Sources::Site.new("https://noizave.deviantart.com/art/test-no-download-697415967") + @site = Sources::Strategies.find("https://noizave.deviantart.com/art/test-no-download-697415967") assert_equal(["https://img00.deviantart.net/56ee/i/2017/219/2/3/test__no_download_by_noizave-dbj81lr.jpg"], @site.image_urls) end end context "The source for a DeviantArt image url" do should "fetch the source data" do - @site = Sources::Site.new("https://pre00.deviantart.net/b5e6/th/pre/f/2016/265/3/5/legend_of_galactic_heroes_by_hideyoshi-daihpha.jpg") + @site = Sources::Strategies.find("https://pre00.deviantart.net/b5e6/th/pre/f/2016/265/3/5/legend_of_galactic_heroes_by_hideyoshi-daihpha.jpg") assert_equal("hideyoshi", @site.artist_name) - assert_equal("https://hideyoshi.deviantart.com", @site.profile_url) - assert_equal("https://orig00.deviantart.net/9e1f/f/2016/265/3/5/legend_of_galactic_heroes_by_hideyoshi-daihpha.jpg", @site.image_url) + assert_equal("https://www.deviantart.com/hideyoshi", @site.profile_url) + assert_equal("https://pre00.deviantart.net/b5e6/th/pre/f/2016/265/3/5/legend_of_galactic_heroes_by_hideyoshi-daihpha.jpg", @site.image_url) end end context "The source for an DeviantArt artwork page" do setup do - @site = Sources::Site.new("http://noizave.deviantart.com/art/test-post-please-ignore-685436408") - @site.get + @site = Sources::Strategies.find("http://noizave.deviantart.com/art/test-post-please-ignore-685436408") end should "get the image url" do @@ -107,8 +104,7 @@ module Sources context "The source for a login-only DeviantArt artwork page" do setup do - @site = Sources::Site.new("http://noizave.deviantart.com/art/hidden-work-685458369") - @site.get + @site = Sources::Strategies.find("http://noizave.deviantart.com/art/hidden-work-685458369") end should "get the image url" do @@ -118,8 +114,7 @@ module Sources context "A source with malformed links in the artist commentary" do should "fix the links" do - @site = Sources::Site.new("https://teemutaiga.deviantart.com/art/Kisu-620666655") - @site.get + @site = Sources::Strategies.find("https://teemutaiga.deviantart.com/art/Kisu-620666655") assert_match(%r!"Print available at Inprnt":\[http://www.inprnt.com/gallery/teemutaiga/kisu\]!, @site.dtext_artist_commentary_desc) end diff --git a/test/unit/sources/nico_seiga_test.rb b/test/unit/sources/nico_seiga_test.rb index f5f70e501..f836bc550 100644 --- a/test/unit/sources/nico_seiga_test.rb +++ b/test/unit/sources/nico_seiga_test.rb @@ -4,11 +4,8 @@ module Sources class NicoSeigaTest < ActiveSupport::TestCase context "The source site for nico seiga" do setup do - @site_1 = Sources::Site.new("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663") - @site_1.get - - @site_2 = Sources::Site.new("http://seiga.nicovideo.jp/seiga/im4937663") - @site_2.get + @site_1 = Sources::Strategies.find("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663") + @site_2 = Sources::Strategies.find("http://seiga.nicovideo.jp/seiga/im4937663") end should "get the profile" do @@ -34,11 +31,11 @@ module Sources should "get the tags" do assert(@site_1.tags.size > 0) first_tag = @site_1.tags.first - assert_equal(["アニメ", "http://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], first_tag) + assert_equal(["アニメ", "https://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], first_tag) assert(@site_2.tags.size > 0) first_tag = @site_2.tags.first - assert_equal(["アニメ", "http://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], first_tag) + assert_equal(["アニメ", "https://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], first_tag) end should "convert a page into a json representation" do @@ -51,8 +48,7 @@ module Sources end should "work for a https://lohas.nicoseiga.jp/thumb/${id}i url" do - site = Sources::Site.new("https://lohas.nicoseiga.jp/thumb/6844226i") - site.get + site = Sources::Strategies.find("https://lohas.nicoseiga.jp/thumb/6844226i") full_image_url = %r!https?://lohas.nicoseiga.jp/priv/[a-f0-9]{40}/[0-9]+/6844226! assert_match(full_image_url, site.image_url) diff --git a/test/unit/sources/nijie_test.rb b/test/unit/sources/nijie_test.rb index a74772e54..4815782d7 100644 --- a/test/unit/sources/nijie_test.rb +++ b/test/unit/sources/nijie_test.rb @@ -7,9 +7,7 @@ module Sources CurrentUser.user = FactoryBot.create(:user) CurrentUser.ip_addr = "127.0.0.1" - @site = Sources::Site.new("http://nijie.info/view.php?id=213043") - @site.get - sleep(5) + @site = Sources::Strategies.find("https://nijie.info/view.php?id=213043") end should "get the image url" do @@ -17,7 +15,7 @@ module Sources end should "get the profile" do - assert_equal("http://nijie.info/members.php?id=728995", @site.profile_url) + assert_equal("https://nijie.info/members.php?id=728995", @site.profile_url) end should "get the artist name" do @@ -25,15 +23,14 @@ module Sources end should "get the tags" do - assert_equal([["眼鏡", "http://nijie.info/search.php?word=%E7%9C%BC%E9%8F%A1"], ["リトルウィッチアカデミア", "http://nijie.info/search.php?word=%E3%83%AA%E3%83%88%E3%83%AB%E3%82%A6%E3%82%A3%E3%83%83%E3%83%81%E3%82%A2%E3%82%AB%E3%83%87%E3%83%9F%E3%82%A2"], ["アーシュラ先生", "http://nijie.info/search.php?word=%E3%82%A2%E3%83%BC%E3%82%B7%E3%83%A5%E3%83%A9%E5%85%88%E7%94%9F"]], @site.tags) + assert_equal([["眼鏡", "https://nijie.info/search.php?word=%E7%9C%BC%E9%8F%A1"], ["リトルウィッチアカデミア", "https://nijie.info/search.php?word=%E3%83%AA%E3%83%88%E3%83%AB%E3%82%A6%E3%82%A3%E3%83%83%E3%83%81%E3%82%A2%E3%82%AB%E3%83%87%E3%83%9F%E3%82%A2"], ["アーシュラ先生", "https://nijie.info/search.php?word=%E3%82%A2%E3%83%BC%E3%82%B7%E3%83%A5%E3%83%A9%E5%85%88%E7%94%9F"]], @site.tags) end should "normalize ()characters in tags" do FactoryBot.create(:tag, :name => "kaga") FactoryBot.create(:wiki_page, :title => "kaga", :other_names => "加賀(艦これ)") - @site = Sources::Site.new("http://nijie.info/view.php?id=208316") - @site.get + @site = Sources::Strategies.find("https://nijie.info/view.php?id=208316") assert_includes(@site.tags.map(&:first), "加賀(艦これ)") assert_includes(@site.translated_tags.map(&:first), "kaga") @@ -50,16 +47,15 @@ module Sources context "The source site for a nijie referer url" do setup do - @site = Sources::Site.new("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", referer_url: "https://nijie.info/view_popup.php?id=213043") - @site.get + @site = Sources::Strategies.find("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", "https://nijie.info/view_popup.php?id=213043") end should "get the image url" do - assert_equal("https://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) + assert_equal("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) end should "get the profile" do - assert_equal("http://nijie.info/members.php?id=728995", @site.profile_url) + assert_equal("https://nijie.info/members.php?id=728995", @site.profile_url) end should "get the artist name" do @@ -69,8 +65,7 @@ module Sources context "The source site for a nijie popup" do setup do - @site = Sources::Site.new("https://nijie.info/view_popup.php?id=213043") - @site.get + @site = Sources::Strategies.find("https://nijie.info/view_popup.php?id=213043") end should "get the image url" do @@ -78,7 +73,7 @@ module Sources end should "get the profile" do - assert_equal("http://nijie.info/members.php?id=728995", @site.profile_url) + assert_equal("https://nijie.info/members.php?id=728995", @site.profile_url) end should "get the artist name" do @@ -88,8 +83,7 @@ module Sources context "The source site for a nijie gallery" do setup do - @site = Sources::Site.new("http://nijie.info/view.php?id=218856") - @site.get + @site = Sources::Strategies.find("https://nijie.info/view.php?id=218856") end should "get the image urls" do diff --git a/test/unit/sources/pawoo_test.rb b/test/unit/sources/pawoo_test.rb index f5a1a30a5..9fd569f26 100644 --- a/test/unit/sources/pawoo_test.rb +++ b/test/unit/sources/pawoo_test.rb @@ -5,8 +5,7 @@ module Sources context "The source site for a https://pawoo.net/web/status/$id url" do setup do skip "Pawoo keys not set" unless Danbooru.config.pawoo_client_id - @site = Sources::Site.new("https://pawoo.net/web/statuses/1202176") - @site.get + @site = Sources::Strategies.find("https://pawoo.net/web/statuses/1202176") end should "get the profile" do @@ -35,8 +34,7 @@ module Sources context "The source site for a https://pawoo.net/$user/$id url" do setup do skip "Pawoo keys not set" unless Danbooru.config.pawoo_client_id - @site = Sources::Site.new("https://pawoo.net/@evazion/19451018") - @site.get + @site = Sources::Strategies.find("https://pawoo.net/@evazion/19451018") end should "get the profile" do @@ -89,8 +87,7 @@ module Sources skip "Pawoo keys not set" unless Danbooru.config.pawoo_client_id @url = "https://img.pawoo.net/media_attachments/files/001/298/028/original/55a6fd252778454b.mp4" @ref = "https://pawoo.net/@evazion/19451018" - @site = Sources::Site.new(@url, referer_url: @ref) - @site.get + @site = Sources::Strategies.find(@url, @ref) end should "fetch the source data" do diff --git a/test/unit/sources/pixiv_test.rb b/test/unit/sources/pixiv_test.rb index 7a2946b03..ce9e967aa 100644 --- a/test/unit/sources/pixiv_test.rb +++ b/test/unit/sources/pixiv_test.rb @@ -3,8 +3,8 @@ require 'test_helper' module Sources class PixivTest < ActiveSupport::TestCase def get_source(source) - @site = Sources::Site.new(source) - @site.get + @site = Sources::Strategies.find(source) + @site rescue Net::OpenTimeout skip "Remote connection to #{source} failed" @@ -23,19 +23,22 @@ module Sources context "in all cases" do context "A touch page" do setup do - @site = Sources::Site.new("http://touch.pixiv.net/member_illust.php?mode=medium&illust_id=59687915") - @image_urls = @site.get + @site = Sources::Strategies.find("http://touch.pixiv.net/member_illust.php?mode=medium&illust_id=59687915") + @image_urls = @site.image_urls end should "get all the image urls" do - assert_equal("https://i.pximg.net/img-original/img/2016/10/29/17/13/23/59687915_p0.png", @image_urls) + expected_urls = [ + "https://i.pximg.net/img-original/img/2016/10/29/17/13/23/59687915_p0.png", + "https://i.pximg.net/img-original/img/2016/10/29/17/13/23/59687915_p1.png" + ].sort + assert_equal(expected_urls, @image_urls.sort) end end context "A gallery page" do setup do - @site = Sources::Site.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482") - @site.get + @site = Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482") @image_urls = @site.image_urls end @@ -46,8 +49,7 @@ module Sources context "An ugoira source site for pixiv" do setup do - @site = Sources::Site.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364") - @site.get + @site = Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364") end should "get the file url" do @@ -66,8 +68,7 @@ module Sources context "A https://i.pximg.net/img-zip/ugoira/* source" do should "get the metadata" do - @site = Sources::Site.new("https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip") - @site.get + @site = Sources::Strategies.find("https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip") assert_equal("uroobnad2", @site.artist_name) end @@ -79,7 +80,7 @@ module Sources end should "get the profile" do - assert_equal("http://www.pixiv.net/member.php?id=696859", @site.profile_url) + assert_equal("https://www.pixiv.net/member.php?id=696859", @site.profile_url) end should "get the artist name" do @@ -142,12 +143,17 @@ module Sources should "get the full size image url" do assert_equal("https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg", @site.image_url) end + + should "get the full size image url for the canonical url" do + assert_equal("https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg", @site.canonical_url) + end end context "fetching source data for a deleted work" do should "raise a bad id error" do assert_raise(::PixivApiClient::BadIDError) do get_source("https://i.pximg.net/img-original/img/2017/11/22/01/06/44/65991677_p0.png") + @site.image_urls end end end diff --git a/test/unit/sources/tumblr_test.rb b/test/unit/sources/tumblr_test.rb index 4bf87d9ef..bfc02c94d 100644 --- a/test/unit/sources/tumblr_test.rb +++ b/test/unit/sources/tumblr_test.rb @@ -9,8 +9,7 @@ module Sources context "The source for a 'http://*.tumblr.com/post/*' photo post with a single image" do setup do - @site = Sources::Site.new("https://noizave.tumblr.com/post/162206271767") - @site.get + @site = Sources::Strategies.find("https://noizave.tumblr.com/post/162206271767") end should "get the artist name" do @@ -22,7 +21,7 @@ module Sources end should "get the tags" do - tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red-hair"]] + tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red_hair"]] assert_equal(tags, @site.tags) end @@ -68,7 +67,7 @@ module Sources end should "get the image url" do - assert_equal("http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_raw.png", @site.image_url) + assert_equal("https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png", @site.image_url) end should "get the artist" do @@ -82,16 +81,15 @@ module Sources context "The source for a 'http://*.tumblr.com/image/*' image page" do setup do - @site = Sources::Site.new("https://noizave.tumblr.com/image/162206271767") - @site.get + @site = Sources::Strategies.find("https://noizave.tumblr.com/image/162206271767") end should "get the image url" do - assert_equal("http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_raw.png", @site.image_url) + assert_equal("https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png", @site.image_url) end should "get the tags" do - tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red-hair"]] + tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red_hair"]] assert_equal(tags, @site.tags) end end @@ -100,20 +98,19 @@ module Sources setup do @url = "https://78.media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg" @ref = "https://noizave.tumblr.com/post/162094447052" - @site = Sources::Site.new(@url, referer_url: @ref) - @site.get + @site = Sources::Strategies.find(@url, @ref) end should "get the image urls" do urls = %w[ - http://data.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_orwwptNBCE1wsfqepo1_raw.png - http://data.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_raw.jpg - http://data.tumblr.com/d2ed224f135b0c81f812df81a0a8692d/tumblr_orwwptNBCE1wsfqepo3_raw.gif - http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_inline_os3134mABB1v11u29_raw.png - http://data.tumblr.com/34ed9d0ff4a21625981372291cb53040/tumblr_nv3hwpsZQY1uft51jo1_raw.gif + https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_orwwptNBCE1wsfqepo1_1280.png + https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg + https://media.tumblr.com/d2ed224f135b0c81f812df81a0a8692d/tumblr_orwwptNBCE1wsfqepo3_1280.gif + https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_inline_os3134mABB1v11u29_1280.png + https://media.tumblr.com/34ed9d0ff4a21625981372291cb53040/tumblr_nv3hwpsZQY1uft51jo1_1280.gif ] - assert_equal(urls, @site.image_urls) + assert_equal(urls.sort, @site.image_urls.sort) end should "get the tags" do @@ -129,17 +126,16 @@ module Sources context "The source for a 'http://*.tumblr.com/post/*' text post with inline images" do setup do - @site = Sources::Site.new("https://noizave.tumblr.com/post/162221502947") - @site.get + @site = Sources::Strategies.find("https://noizave.tumblr.com/post/162221502947") end should "get the image urls" do urls = %w[ - http://data.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_raw.png - http://data.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_raw.jpg + https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_1280.png + https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_1280.jpg ] - assert_equal(urls, @site.image_urls) + assert_equal(urls.sort, @site.image_urls.sort) end should "get the commentary" do @@ -151,14 +147,13 @@ module Sources context "The source for a 'http://*.tumblr.com/post/*' video post with inline images" do setup do - @site = Sources::Site.new("https://noizave.tumblr.com/post/162222617101") - @site.get + @site = Sources::Strategies.find("https://noizave.tumblr.com/post/162222617101") end should "get the image urls" do urls = %w[ https://vtt.tumblr.com/tumblr_os31dkexhK1wsfqep.mp4 - http://data.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os31dclyCR1v11u29_raw.png + https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os31dclyCR1v11u29_1280.png ] assert_equal(urls, @site.image_urls) @@ -167,12 +162,11 @@ module Sources context "The source for a 'http://*.tumblr.com/post/*' answer post with inline images" do setup do - @site = Sources::Site.new("https://noizave.tumblr.com/post/171237880542/test-ask") - @site.get + @site = Sources::Strategies.find("https://noizave.tumblr.com/post/171237880542/test-ask") end should "get the image urls" do - urls = ["http://data.tumblr.com/cb481f031010e8ddad564b2150149c9a/tumblr_inline_p4nxoyLrSh1v11u29_raw.png"] + urls = ["https://media.tumblr.com/cb481f031010e8ddad564b2150149c9a/tumblr_inline_p4nxoyLrSh1v11u29_1280.png"] assert_equal(urls, @site.image_urls) end diff --git a/test/unit/sources/twitter_test.rb b/test/unit/sources/twitter_test.rb index 2687e4a78..08068646d 100644 --- a/test/unit/sources/twitter_test.rb +++ b/test/unit/sources/twitter_test.rb @@ -2,79 +2,16 @@ require 'test_helper' module Sources class TwitterTest < ActiveSupport::TestCase - context "A video" do - setup do - skip "Twitter key is not set" unless Danbooru.config.twitter_api_key - @site = Sources::Site.new("https://twitter.com/CincinnatiZoo/status/859073537713328129") - @site.get - end - - should "get the image url" do - assert_equal("https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4", @site.image_url) - end - end - - context "An animated gif" do - setup do - skip "Twitter key is not set" unless Danbooru.config.twitter_api_key - @site = Sources::Site.new("https://twitter.com/DaniStrawberry1/status/859435334765088769") - @site.get - end - - should "get the image url" do - assert_equal("https://video.twimg.com/tweet_video/C-1Tns7WsAAqvqn.mp4", @site.image_url) - end - end - - context "A twitter summary card" do - setup do - skip "Twitter key is not set" unless Danbooru.config.twitter_api_key - @site = Sources::Site.new("https://twitter.com/NatGeo/status/932700115936178177") - @site.get - end - - should "get the image url" do - assert_equal("https://pmdvod.nationalgeographic.com/NG_Video/205/302/smpost_1510342850295.jpg", @site.image_url) - end - end - - context "A twitter summary card from twitter" do - setup do - skip "Twitter key is not set" unless Danbooru.config.twitter_api_key - @site = Sources::Site.new("https://twitter.com/masayasuf/status/870734961778630656/photo/1") - @site.get - end - - should "get the image url" do - assert_equal("https://pbs.twimg.com/media/DBV40M2UIAAHYlt.jpg:orig", @site.image_url) - end - end - - context "A twitter summary card from twitter with a :large image" do - setup do - skip "Twitter key is not set" unless Danbooru.config.twitter_api_key - @site = Sources::Site.new("https://twitter.com/aranobu/status/817736083567820800") - @site.get - end - - should "get the image url" do - assert_equal("https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:orig", @site.image_url) - end - end - context "An extended tweet" do should "extract the correct image url" do skip "Twitter key is not set" unless Danbooru.config.twitter_api_key - @site = Sources::Site.new("https://twitter.com/onsen_musume_jp/status/865534101918330881") - @site.get - + @site = Sources::Strategies.find("https://twitter.com/onsen_musume_jp/status/865534101918330881") assert_equal(["https://pbs.twimg.com/media/DAL-ntWV0AEbhes.jpg:orig"], @site.image_urls) end should "extract all the image urls" do skip "Twitter key is not set" unless Danbooru.config.twitter_api_key - @site = Sources::Site.new("https://twitter.com/aoimanabu/status/892370963630743552") - @site.get + @site = Sources::Strategies.find("https://twitter.com/aoimanabu/status/892370963630743552") urls = %w[ https://pbs.twimg.com/media/DGJWp59UIAA_-en.jpg:orig @@ -85,12 +22,72 @@ module Sources assert_equal(urls, @site.image_urls) end end + + context "A video" do + setup do + skip "Twitter key is not set" unless Danbooru.config.twitter_api_key + @site = Sources::Strategies.find("https://twitter.com/CincinnatiZoo/status/859073537713328129") + end + + should "get the image url" do + assert_equal("https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4", @site.image_url) + end + end + + context "An animated gif" do + setup do + skip "Twitter key is not set" unless Danbooru.config.twitter_api_key + @site = Sources::Strategies.find("https://twitter.com/DaniStrawberry1/status/859435334765088769") + end + + should "get the image url" do + assert_equal("https://video.twimg.com/tweet_video/C-1Tns7WsAAqvqn.mp4", @site.image_url) + end + end + + context "A twitter summary card" do + setup do + skip "Twitter key is not set" unless Danbooru.config.twitter_api_key + @site = Sources::Strategies.find("https://twitter.com/NatGeo/status/932700115936178177") + end + + should "get the image url" do + assert_equal("https://pmdvod.nationalgeographic.com/NG_Video/205/302/smpost_1510342850295.jpg", @site.image_url) + end + end + + context "A twitter summary card from twitter" do + setup do + skip "Twitter key is not set" unless Danbooru.config.twitter_api_key + @site = Sources::Strategies.find("https://twitter.com/masayasuf/status/870734961778630656/photo/1") + end + + should "get the image url" do + skip "Find another url, the masayasuf tweet no longer exists" + assert_equal("https://pbs.twimg.com/media/DBV40M2UIAAHYlt.jpg:orig", @site.image_url) + end + end + + context "A twitter summary card from twitter with a :large image" do + setup do + skip "Twitter key is not set" unless Danbooru.config.twitter_api_key + @site = Sources::Strategies.find("https://twitter.com/aranobu/status/817736083567820800") + end + + should "get the image url" do + assert_equal("https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:orig", @site.image_url) + end + + should "get the canonical url" do + assert_equal("https://twitter.com/aranobu/status/817736083567820800", @site.canonical_url) + end + end context "The source site for a restricted twitter" do setup do skip "Twitter key is not set" unless Danbooru.config.twitter_api_key - @site = Sources::Site.new("https://mobile.twitter.com/Strangestone/status/556440271961858051") - @site.get + @site = Sources::Strategies.find("https://mobile.twitter.com/Strangestone/status/556440271961858051") + end should "get the image url" do @@ -101,8 +98,7 @@ module Sources context "The source site for twitter" do setup do skip "Twitter key is not set" unless Danbooru.config.twitter_api_key - @site = Sources::Site.new("https://mobile.twitter.com/nounproject/status/540944400767922176") - @site.get + @site = Sources::Strategies.find("https://mobile.twitter.com/nounproject/status/540944400767922176") end should "get the profile" do @@ -135,8 +131,7 @@ module Sources context "The source site for a direct image and a referer" do setup do skip "Twitter key is not set" unless Danbooru.config.twitter_api_key - @site = Sources::Site.new("https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large", referer_url: "https://twitter.com/nounproject/status/540944400767922176") - @site.get + @site = Sources::Strategies.find("https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large", "https://twitter.com/nounproject/status/540944400767922176") end should "get the artist name" do @@ -151,8 +146,7 @@ module Sources context "The source site for a https://twitter.com/i/web/status/:id url" do setup do skip "Twitter key is not set" unless Danbooru.config.twitter_api_key - @site = Sources::Site.new("https://twitter.com/i/web/status/943446161586733056") - @site.get + @site = Sources::Strategies.find("https://twitter.com/i/web/status/943446161586733056") end should "fetch the source data" do @@ -163,8 +157,7 @@ module Sources context "A tweet" do setup do skip "Twitter key is not set" unless Danbooru.config.twitter_api_key - @site = Sources::Site.new("https://twitter.com/noizave/status/875768175136317440") - @site.get + @site = Sources::Strategies.find("https://twitter.com/noizave/status/875768175136317440") end should "convert urls, hashtags, and mentions to dtext" do diff --git a/test/unit/tag_alias_correction_test.rb b/test/unit/tag_alias_correction_test.rb index b467dbe33..91cafdeaa 100644 --- a/test/unit/tag_alias_correction_test.rb +++ b/test/unit/tag_alias_correction_test.rb @@ -17,6 +17,7 @@ class TagAliasCorrectionTest < ActiveSupport::TestCase context "with a bad cache and post counts" do setup do + Cache.delete("ta:#{Cache.hash('bbb')}") Cache.put("ta:#{Cache.hash('aaa')}", "zzz") Tag.where(:name => "aaa").update_all("post_count = -3") @correction = TagAliasCorrection.new(@tag_alias.id)