1
0
mirror of https://github.com/e621ng/dtext_rb.git synced 2025-03-04 03:03:03 -05:00

More efficient full test script

Turns out the previous version was slow af
From 2m4s to 31s for reading/parsing the whole reference file
This commit is contained in:
Earlopain 2023-03-31 22:36:07 +02:00
parent c6cd30d7bc
commit b3ef8b8f1c
No known key found for this signature in database
GPG Key ID: 6CFB948E15246897
4 changed files with 42 additions and 21 deletions

4
.gitignore vendored
View File

@ -5,5 +5,5 @@ pkg/
*.so
*~
differences.yml
dtext_reference.csv
dtext.csv
dtext_reference.json.gz
dtext.json.gz

18
test/reference.sql Normal file
View File

@ -0,0 +1,18 @@
SET STATEMENT_TIMEOUT = 0;
@export {"type": "json", "processor": { "printTableName": false } }
SELECT 'wp' || id as id, body FROM wiki_pages
UNION ALL SELECT 'pf' || id, reason FROM post_flags
UNION ALL SELECT 'bl' || id, body FROM blips
UNION ALL SELECT 'cm' || id, body FROM comments
UNION ALL SELECT 'fp' || id, body FROM forum_posts
UNION ALL SELECT 'uf' || id, body FROM user_feedback
UNION ALL SELECT 'no' || id, body FROM notes
UNION ALL SELECT 'po' || id, description FROM pools
UNION ALL SELECT 'ps' || id, description FROM post_sets
UNION ALL SELECT 'ua' || id, profile_about FROM users WHERE profile_about IS NOT NULL AND profile_about != ''
UNION ALL SELECT 'ui' || id, profile_artinfo FROM users WHERE profile_artinfo IS NOT NULL AND profile_artinfo != ''
UNION ALL SELECT 'pd' || id, description FROM posts WHERE description IS NOT NULL AND description != '';
-- tr -d '\0-\10\13\14\16-\37' < export.json > dtext.json
-- jq -c '.[]' dtext.json | gzip > dtext.json.gz

View File

@ -1,20 +1,19 @@
require "csv"
require "dtext/dtext"
require "json"
require "yaml"
require "zlib"
differences = []
CSV.open("dtext_reference.csv", "r").each do |row|
input = row[0]
color_expected = row[1]
no_color_expected = row[2]
color = DText.parse(input, allow_color: true)[0]
no_color = DText.parse(input, allow_color: false)[0]
if color != color_expected
differences << [input, color_expected, color]
end
if no_color != no_color_expected
differences << [input, no_color_expected, no_color]
Zlib::GzipReader.open("dtext_reference.json.gz") do |file|
file.each_line.with_index do |line, i|
puts i if i % 10_000 == 0
json = JSON.parse(line)
dtext = DText.parse(json["i"], allow_color: false)[0]
if dtext != json["o"]
differences << [json["id"], json["i"], json["o"], dtext]
end
end
end

View File

@ -1,11 +1,15 @@
require "csv"
require "dtext/dtext"
require "json"
require "zlib"
CSV.open("dtext_reference.csv", "w") do |result|
CSV.foreach("dtext.csv") do |row|
input = row.first
no_color = DText.parse(input, allow_color: false)[0]
color = DText.parse(input, allow_color: true)[0]
result << [input, color, no_color]
Zlib::GzipWriter.open("dtext_reference.json.gz") do |output|
Zlib::GzipReader.open("dtext.json.gz") do |file|
file.each_line.with_index do |line, i|
puts i if i % 10_000 == 0
json = JSON.parse(line)
dtext = DText.parse(json["body"], allow_color: false)[0]
output.puts({ id: json["id"], i: json["body"], o: dtext }.to_json + "\n")
end
end
end