Switch to OpenSearch (#550)

Run the following commands to import data into OpenSearch:

Post.document_store.create_index!
PostVersion.document_store.create_index!
Post.document_store.import
PostVersion.document_store.import

* Add opensearch client

* Add url param to force use opensearch

* Switch import method over to opensearch

* Index to opensearch as well

* Add option to gradually roll out os

* Index os with separate queue

* Move os post creation to job as well

* Exclusively use the OpenSearch client

* Stop enqueuing OsIndexUpdateJob

* Remove remaining elasticsearch code

Bump faraday since v2 is no longer blocked
This commit is contained in:
Earlopain 2023-10-02 18:57:07 +02:00 committed by GitHub
parent 156276f0c0
commit 3a58ac0938
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 41 additions and 64 deletions

View File

@ -29,7 +29,7 @@ gem 'sidekiq-unique-jobs'
gem 'redis'
gem 'request_store'
gem 'elasticsearch'
gem 'opensearch-ruby'
gem 'mailgun-ruby'
gem 'resolv'

View File

@ -86,6 +86,7 @@ GEM
public_suffix (>= 2.0.2, < 6.0)
ast (2.4.2)
backport (1.2.0)
base64 (0.1.1)
bcrypt (3.1.19)
benchmark (0.2.1)
better_html (2.0.2)
@ -125,43 +126,17 @@ GEM
request_store (>= 1.0)
ruby2_keywords
e2mmap (0.1.0)
elasticsearch (7.17.7)
elasticsearch-api (= 7.17.7)
elasticsearch-transport (= 7.17.7)
elasticsearch-api (7.17.7)
multi_json
elasticsearch-transport (7.17.7)
faraday (~> 1)
multi_json
erubi (1.12.0)
factory_bot (6.2.1)
activesupport (>= 5.0.0)
factory_bot_rails (6.2.0)
factory_bot (~> 6.2.0)
railties (>= 5.0.0)
faraday (1.10.3)
faraday-em_http (~> 1.0)
faraday-em_synchrony (~> 1.0)
faraday-excon (~> 1.1)
faraday-httpclient (~> 1.0)
faraday-multipart (~> 1.0)
faraday-net_http (~> 1.0)
faraday-net_http_persistent (~> 1.0)
faraday-patron (~> 1.0)
faraday-rack (~> 1.0)
faraday-retry (~> 1.0)
faraday (2.7.11)
base64
faraday-net_http (>= 2.0, < 3.1)
ruby2_keywords (>= 0.0.4)
faraday-em_http (1.0.0)
faraday-em_synchrony (1.0.0)
faraday-excon (1.1.0)
faraday-httpclient (1.0.1)
faraday-multipart (1.0.4)
multipart-post (~> 2)
faraday-net_http (1.0.1)
faraday-net_http_persistent (1.2.0)
faraday-patron (1.0.0)
faraday-rack (1.0.0)
faraday-retry (1.0.3)
faraday-net_http (3.0.2)
ffi (1.15.5)
get_process_mem (0.2.7)
ffi (~> 1.0)
@ -212,7 +187,6 @@ GEM
msgpack (1.7.2)
multi_json (1.15.0)
multi_xml (0.6.0)
multipart-post (2.3.0)
net-imap (0.3.7)
date
net-protocol
@ -228,6 +202,9 @@ GEM
nokogiri (1.15.3)
mini_portile2 (~> 2.8.2)
racc (~> 1.4)
opensearch-ruby (3.0.1)
faraday (>= 1.0, < 3)
multi_json (>= 1.0)
parallel (1.23.0)
parser (3.2.2.3)
ast (~> 2.4.1)
@ -401,7 +378,6 @@ DEPENDENCIES
dotenv-rails
draper
dtext_rb!
elasticsearch
factory_bot_rails
httparty
listen
@ -410,6 +386,7 @@ DEPENDENCIES
memoist
mocha
newrelic_rpm
opensearch-ruby
pg
puma
rails (~> 7.0)

View File

@ -58,9 +58,9 @@ The postgres server accepts outside connections which you can use to access it w
Installation follows the same steps as the docker compose file. Ubuntu 20.04 is the current installation target.
There is no script that performs these steps for you, as you need to split them up to match your infrastructure.
Running a single machine install in production is possible, but is likely to be somewhat sluggish due to contention in disk between postgresql and elasticsearch.
Running a single machine install in production is possible, but is likely to be somewhat sluggish due to contention in disk between postgresql and opensearch.
Minimum RAM is 4GB. You will need to adjust values in config files to match how much RAM is available.
If you are targeting more than a hundred thousand posts and reasonable user volumes, you probably want to procure yourself a database server. See tuning guides for postgresql and elasticsearch for help planning these requirements.
If you are targeting more than a hundred thousand posts and reasonable user volumes, you probably want to procure yourself a database server. See tuning guides for postgresql and opensearch for help planning these requirements.
### Production Troubleshooting

View File

@ -27,6 +27,6 @@ module DocumentStore
end
def self.client
@client ||= Elasticsearch::Client.new(host: Danbooru.config.elasticsearch_host)
@client ||= OpenSearch::Client.new(host: Danbooru.config.opensearch_host)
end
end

View File

@ -18,7 +18,7 @@ FileUtils.chdir APP_ROOT do
FileUtils.cp 'docker/danbooru_local_config.rb', 'config/danbooru_local_config.rb'
end
puts "== Creating elasticsearch indices ==\n"
puts "== Creating opensearch indices ==\n"
system! "RAILS_ENV=development bin/rails runner '[Post, PostVersion].each { |model| model.document_store.create_index! }'"
puts "\n== Preparing database =="

View File

@ -627,8 +627,7 @@ module Danbooru
def iqdb_server
end
def elasticsearch_host
'127.0.0.1'
def opensearch_host
end
# Use a recaptcha on the signup page to protect against spambots creating new accounts.

View File

@ -3,7 +3,7 @@ version: "3"
x-environment: &common-env
DANBOORU_HOSTNAME: http://localhost:${EXPOSED_SERVER_PORT:-3000}
DANBOORU_REDIS_URL: redis://redis
DANBOORU_ELASTICSEARCH_HOST: elastic
DANBOORU_OPENSEARCH_HOST: opensearch
DANBOORU_MEMCACHED_SERVERS: memcached
DANBOORU_IQDB_SERVER: http://iqdb:5588
DANBOORU_DISCORD_SITE: http://localhost:8000
@ -15,7 +15,7 @@ x-environment: &common-env
SESSION_SECRET_KEY: 44b4f44e9f253c406cbe727d403d500c1cecff943e4d2aea8f5447f28846fffe
x-depends-on: &common-depends-on
elastic:
opensearch:
condition: service_healthy
memcached:
condition: service_started
@ -97,19 +97,20 @@ services:
memcached:
image: memcached:1.5.22-alpine
elastic:
image: elasticsearch:7.14.2
opensearch:
image: opensearchproject/opensearch:2.9.0
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- logger.level=WARN
- ES_JAVA_OPTS=-Xms1g -Xmx1g
- DISABLE_SECURITY_PLUGIN=true
- DISABLE_INSTALL_DEMO_CONFIG=true
- OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g
volumes:
- elastic_data:/usr/share/elasticsearch/data
- opensearch_data:/usr/share/opensearch/data
healthcheck:
interval: 10s
timeout: 2s
test: nc -z elastic 9200
test: curl "opensearch:9200/_cluster/health?wait_for_status=yellow&timeout=2s"
iqdb:
image: ghcr.io/e621ng/iqdb:d4fed9d9a51184e72d2f14d4ec461d7830bd177a
@ -181,7 +182,7 @@ services:
volumes:
post_data:
iqdb_data:
elastic_data:
opensearch_data:
db_data:
redis_data_v2:
node_modules:

View File

@ -24,7 +24,7 @@ Shoulda::Matchers.configure do |config|
end
WebMock.disable_net_connect!(allow: [
Danbooru.config.elasticsearch_host,
Danbooru.config.opensearch_host,
])
FactoryBot::SyntaxRunner.class_eval do
@ -37,7 +37,7 @@ end
BCrypt::Engine.send(:remove_const, :DEFAULT_COST)
BCrypt::Engine::DEFAULT_COST = BCrypt::Engine::MIN_COST
# Clear the elastic indicies completly
# Clear the opensearch indicies completly
Post.document_store.create_index!(delete_existing: true)
PostVersion.document_store.create_index!(delete_existing: true)

View File

@ -8,34 +8,34 @@ module DocumentStore
end
teardown do
WebMock.disable_net_connect!(allow: [Danbooru.config.elasticsearch_host])
WebMock.disable_net_connect!(allow: [Danbooru.config.opensearch_host])
end
def stub_elastic(method, path)
stub_request(method, "http://#{Danbooru.config.elasticsearch_host}:9200#{path}")
def stub_opensearch(method, path)
stub_request(method, "http://#{Danbooru.config.opensearch_host}:9200#{path}")
end
test "it deletes the index" do
delete_request = stub_elastic(:delete, "/posts_test")
delete_request = stub_opensearch(:delete, "/posts_test")
Post.document_store.delete_index!
assert_requested delete_request
end
test "it checks for the existance of the index" do
head_request = stub_elastic(:head, "/posts_test")
head_request = stub_opensearch(:head, "/posts_test")
Post.document_store.index_exist?
assert_requested head_request
end
test "it skips creating the index if it already exists" do
head_request = stub_elastic(:head, "/posts_test").to_return(status: 200)
head_request = stub_opensearch(:head, "/posts_test").to_return(status: 200)
Post.document_store.create_index!
assert_requested head_request
end
test "it creates the index if it doesn't exist" do
head_request = stub_elastic(:head, "/posts_test").to_return(status: 404)
put_request = stub_elastic(:put, "/posts_test").with(body: Post.document_store.index)
head_request = stub_opensearch(:head, "/posts_test").to_return(status: 404)
put_request = stub_opensearch(:put, "/posts_test").with(body: Post.document_store.index)
assert(Post.document_store.index.present?)
Post.document_store.create_index!
@ -45,9 +45,9 @@ module DocumentStore
end
test "it recreates the index if delete_existing is true and the index already exists" do
head_request = stub_elastic(:head, "/posts_test").to_return(status: 200)
delete_request = stub_elastic(:delete, "/posts_test")
put_request = stub_elastic(:put, "/posts_test")
head_request = stub_opensearch(:head, "/posts_test").to_return(status: 200)
delete_request = stub_opensearch(:delete, "/posts_test")
put_request = stub_opensearch(:put, "/posts_test")
Post.document_store.create_index!(delete_existing: true)
@ -57,13 +57,13 @@ module DocumentStore
end
test "it deletes by query" do
post_request = stub_elastic(:post, "/posts_test/_delete_by_query?q=*").with(body: "{}")
post_request = stub_opensearch(:post, "/posts_test/_delete_by_query?q=*").with(body: "{}")
Post.document_store.delete_by_query(query: "*", body: {})
assert_requested(post_request)
end
test "it refreshes the index" do
post_request = stub_elastic(:post, "/posts_test/_refresh")
post_request = stub_opensearch(:post, "/posts_test/_refresh")
Post.document_store.refresh_index!
assert_requested(post_request)
end

View File

@ -8,7 +8,7 @@ class PaginatorTest < ActiveSupport::TestCase
assert_equal(is_last_page, records.is_last_page?, "is_last_page")
end
{ active_record: Blip, elasticsearch: Post }.each do |name, model| # rubocop:disable Metrics/BlockLength
{ active_record: Blip, opensearch: Post }.each do |name, model| # rubocop:disable Metrics/BlockLength
context name do
context "sequential pagination (before)" do
should "return the correct set of records" do