Add storage managers (local, sftp, s3, hybrid).

This commit is contained in:
evazion 2018-03-14 16:57:29 -05:00
parent 8a012d4c91
commit b0c7d9c185
8 changed files with 332 additions and 0 deletions

View File

@ -0,0 +1,106 @@
class StorageManager
class Error < StandardError; end
DEFAULT_BASE_URL = Rails.application.routes.url_helpers.root_url + "data"
DEFAULT_BASE_DIR = "#{Rails.root}/public/data"
attr_reader :base_url, :base_dir, :hierarchical, :tagged_filenames, :large_image_prefix
def initialize(base_url: DEFAULT_BASE_URL, base_dir: DEFAULT_BASE_DIR, hierarchical: false, tagged_filenames: Danbooru.config.enable_seo_post_urls, large_image_prefix: Danbooru.config.large_image_prefix)
@base_url = base_url.chomp("/")
@base_dir = base_dir
@hierarchical = hierarchical
@tagged_filenames = tagged_filenames
@large_image_prefix = large_image_prefix
end
# Store the given file at the given path. If a file already exists at that
# location it should be overwritten atomically. Either the file is fully
# written, or an error is raised and the original file is left unchanged. The
# file should never be in a partially written state.
def store(io, path)
raise NotImplementedError, "store not implemented"
end
# Delete the file at the given path. If the file doesn't exist, no error
# should be raised.
def delete(path)
raise NotImplementedError, "delete not implemented"
end
# Return a readonly copy of the file located at the given path.
def open(path)
raise NotImplementedError, "open not implemented"
end
def store_file(io, post, type)
store(io, file_path(post.md5, post.file_ext, type))
end
def delete_file(post_id, md5, file_ext, type)
delete(file_path(md5, file_ext, type))
end
def open_file(post, type)
open(file_path(post.md5, post.file_ext, type))
end
def file_url(post, type)
subdir = subdir_for(post.md5)
file = file_name(post.md5, post.file_ext, type)
if type == :preview && !post.has_preview?
"#{base_url}/images/download-preview.png"
elsif type == :preview
"#{base_url}/preview/#{subdir}#{file}"
elsif type == :large && post.has_large?
"#{base_url}/sample/#{subdir}#{seo_tags(post)}#{file}"
else
"#{base_url}/#{subdir}#{seo_tags(post)}#{file}"
end
end
protected
def file_path(md5, file_ext, type)
subdir = subdir_for(md5)
file = file_name(md5, file_ext, type)
case type
when :preview
"#{base_dir}/preview/#{subdir}#{file}"
when :large
"#{base_dir}/sample/#{subdir}#{file}"
when :original
"#{base_dir}/#{subdir}#{file}"
end
end
def file_name(md5, file_ext, type)
large_file_ext = (file_ext == "zip") ? "webm" : "jpg"
case type
when :preview
"#{md5}.jpg"
when :large
"#{large_image_prefix}#{md5}.#{large_file_ext}"
when :original
"#{md5}.#{file_ext}"
end
end
def subdir_for(md5)
if hierarchical
"#{md5[0..1]}/#{md5[2..3]}/"
else
""
end
end
def seo_tags(post, user = CurrentUser.user)
return "" if !tagged_filenames || user.disable_tagged_filenames?
tags = post.humanized_essential_tag_string.gsub(/[^a-z0-9]+/, "_").gsub(/(?:^_+)|(?:_+$)/, "").gsub(/_{2,}/, "_")
"__#{tags}__"
end
end

View File

@ -0,0 +1,23 @@
class StorageManager::Hybrid < StorageManager
attr_reader :submanager
def initialize(&block)
@submanager = block
end
def store_file(io, post, type)
submanager[post.id, post.md5, post.file_ext, type].store_file(io, post, type)
end
def delete_file(post_id, md5, file_ext, type)
submanager[post_id, md5, file_ext, type].delete_file(post_id, md5, file_ext, type)
end
def open_file(io, post, type)
submanager[post.id, post.md5, post.file_ext, type].open_file(post, type)
end
def file_url(post, type)
submanager[post.id, post.md5, post.file_ext, type].file_url(post, type)
end
end

View File

@ -0,0 +1,25 @@
class StorageManager::Local < StorageManager
DEFAULT_PERMISSIONS = 0644
def store(io, dest_path)
temp_path = dest_path + "-" + SecureRandom.uuid + ".tmp"
FileUtils.mkdir_p(File.dirname(temp_path))
bytes_copied = IO.copy_stream(io, temp_path)
raise Error, "store failed: #{bytes_copied}/#{io.size} bytes copied" if bytes_copied != io.size
FileUtils.chmod(DEFAULT_PERMISSIONS, temp_path)
File.rename(temp_path, dest_path)
rescue StandardError => e
FileUtils.rm_f(temp_path)
raise Error, e
end
def delete(path)
FileUtils.rm_f(path)
end
def open(path)
File.open(path, "r", binmode: true)
end
end

View File

@ -0,0 +1,13 @@
class StorageManager::Null < StorageManager
def store(io, path)
# no-op
end
def delete(path)
# no-op
end
def open(path)
# no-op
end
end

View File

@ -0,0 +1,43 @@
class StorageManager::S3 < StorageManager
# https://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html#initialize-instance_method
DEFAULT_S3_OPTIONS = {
region: Danbooru.config.aws_region,
credentials: Danbooru.config.aws_credentials,
logger: Rails.logger,
}
# https://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html#put_object-instance_method
DEFAULT_PUT_OPTIONS = {
acl: "public-read",
storage_class: "STANDARD", # STANDARD, STANDARD_IA, REDUCED_REDUNDANCY
cache_control: "public, max-age=#{1.year.to_i}",
#content_type: "image/jpeg" # XXX should set content type
}
attr_reader :bucket, :client, :s3_options
def initialize(bucket, client: nil, s3_options: {}, **options)
@bucket = bucket
@s3_options = DEFAULT_S3_OPTIONS.merge(s3_options)
@client = client || Aws::S3::Client.new(**@s3_options)
super(**options)
end
def store(io, path)
data = io.read
base64_md5 = Digest::MD5.base64digest(data)
client.put_object(bucket: bucket, key: path, body: data, content_md5: base64_md5, **DEFAULT_PUT_OPTIONS)
end
def delete(path)
client.delete_object(bucket: bucket, key: path)
rescue Aws::S3::Errors::NoSuchKey
# ignore
end
def open(path)
file = Tempfile.new(binmode: true)
client.get_object(bucket: bucket: key: path, response_target: file)
file
end
end

View File

@ -0,0 +1,76 @@
class StorageManager::SFTP < StorageManager
DEFAULT_PERMISSIONS = 0644
# http://net-ssh.github.io/net-ssh/Net/SSH.html#method-c-start
DEFAULT_SSH_OPTIONS = {
timeout: 10,
logger: Rails.logger,
verbose: :fatal,
non_interactive: true,
}
attr_reader :hosts, :ssh_options
def initialize(*hosts, ssh_options: {}, **options)
@hosts = hosts
@ssh_options = DEFAULT_SSH_OPTIONS.merge(ssh_options)
super(**options)
end
def store(file, dest_path)
temp_upload_path = dest_path + "-" + SecureRandom.uuid + ".tmp"
dest_backup_path = dest_path + "-" + SecureRandom.uuid + ".bak"
each_host do |host, sftp|
begin
sftp.upload!(file.path, temp_upload_path)
sftp.setstat!(temp_upload_path, permissions: DEFAULT_PERMISSIONS)
# `rename!` can't overwrite existing files, so if a file already exists
# at dest_path we move it out of the way first.
force { sftp.rename!(dest_path, dest_backup_path) }
force { sftp.rename!(temp_upload_path, dest_path) }
rescue StandardError => e
# if anything fails, try to move the original file back in place (if it was moved).
force { sftp.rename!(dest_backup_path, dest_path) }
raise Error, e
ensure
force { sftp.remove!(temp_upload_path) }
force { sftp.remove!(dest_backup_path) }
end
end
end
def delete(dest_path)
each_host do |host, sftp|
force { sftp.remove!(dest_path) }
end
end
def open(dest_path)
file = Tempfile.new(binmode: true)
Net::SFTP.start(hosts.first, nil, ssh_options) do |sftp|
sftp.download!(dest_path, file.path)
end
file
end
protected
# Ignore "no such file" exceptions for the given operation.
def force
yield
rescue Net::SFTP::StatusException => e
raise Error, e unless e.description == "no such file"
end
def each_host
hosts.each do |host|
Net::SFTP.start(host, nil, ssh_options) do |sftp|
yield host, sftp
end
end
end
end

View File

@ -222,6 +222,41 @@ module Danbooru
"danbooru"
end
# The method to use for storing image files.
def storage_manager
# Store files on the local filesystem.
# base_dir - where to store files (default: under public/data)
# base_url - where to serve files from (default: http://#{hostname}/data)
# hierarchical: false - store files in a single directory
# hierarchical: true - store files in a hierarchical directory structure, based on the MD5 hash
StorageManager::Local.new(base_dir: "#{Rails.root}/public/data", hierarchical: false)
# Store files on one or more remote host(s). Configure SSH settings in
# ~/.ssh_config or in the ssh_options param (ref: http://net-ssh.github.io/net-ssh/Net/SSH.html#method-c-start)
# StorageManager::SFTP.new("i1.example.com", "i2.example.com", base_dir: "/mnt/backup", hierarchical: false, ssh_options: {})
# Store files in an S3 bucket. The bucket must already exist and be
# writable by you. Configure your S3 settings in aws_region and
# aws_credentials below, or in the s3_options param (ref:
# https://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html#initialize-instance_method)
# StorageManager::S3.new("my_s3_bucket", base_url: "https://my_s3_bucket.s3.amazonaws.com/", s3_options: {})
# Select the storage method based on the post's id and type (preview, large, or original).
# StorageManager::Hybrid.new do |id, md5, file_ext, type|
# ssh_options = { user: "danbooru" }
#
# if type.in?([:large, :original]) && id.in?(0..850_000)
# StorageManager::SFTP.new("raikou1.donmai.us", base_url: "https://raikou1.donmai.us", base_dir: "/path/to/files", hierarchical: true, ssh_options: ssh_options)
# elsif type.in?([:large, :original]) && id.in?(850_001..2_000_000)
# StorageManager::SFTP.new("raikou2.donmai.us", base_url: "https://raikou2.donmai.us", base_dir: "/path/to/files", hierarchical: true, ssh_options: ssh_options)
# elsif type.in?([:large, :original]) && id.in?(2_000_001..3_000_000)
# StorageManager::SFTP.new(*all_server_hosts, base_url: "https://hijiribe.donmai.us/data", ssh_options: ssh_options)
# else
# StorageManager::SFTP.new(*all_server_hosts, ssh_options: ssh_options)
# end
# end
end
def build_file_url(post)
"/data/#{post.file_path_prefix}/#{post.md5}.#{post.file_ext}"
end
@ -611,6 +646,14 @@ module Danbooru
end
# AWS config options
def aws_region
"us-east-1"
end
def aws_credentials
Aws::Credentials.new(Danbooru.config.aws_access_key_id, Danbooru.config.aws_secret_access_key)
end
def aws_access_key_id
end

View File

@ -38,6 +38,9 @@ class ActiveSupport::TestCase
mock_missed_search_service!
WebMock.allow_net_connect!
Danbooru.config.stubs(:enable_sock_puppet_validation?).returns(false)
storage_manager = StorageManager::Local.new(base_dir: "#{Rails.root}/public/data/test")
Danbooru.config.stubs(:storage_manager).returns(storage_manager)
end
teardown do