mirror of
https://github.com/e621ng/dtext_rb.git
synced 2025-03-04 03:03:03 -05:00
More efficient full test script
Turns out the previous version was slow af From 2m4s to 31s for reading/parsing the whole reference file
This commit is contained in:
parent
c6cd30d7bc
commit
b3ef8b8f1c
4
.gitignore
vendored
4
.gitignore
vendored
@ -5,5 +5,5 @@ pkg/
|
||||
*.so
|
||||
*~
|
||||
differences.yml
|
||||
dtext_reference.csv
|
||||
dtext.csv
|
||||
dtext_reference.json.gz
|
||||
dtext.json.gz
|
||||
|
18
test/reference.sql
Normal file
18
test/reference.sql
Normal file
@ -0,0 +1,18 @@
|
||||
SET STATEMENT_TIMEOUT = 0;
|
||||
@export {"type": "json", "processor": { "printTableName": false } }
|
||||
|
||||
SELECT 'wp' || id as id, body FROM wiki_pages
|
||||
UNION ALL SELECT 'pf' || id, reason FROM post_flags
|
||||
UNION ALL SELECT 'bl' || id, body FROM blips
|
||||
UNION ALL SELECT 'cm' || id, body FROM comments
|
||||
UNION ALL SELECT 'fp' || id, body FROM forum_posts
|
||||
UNION ALL SELECT 'uf' || id, body FROM user_feedback
|
||||
UNION ALL SELECT 'no' || id, body FROM notes
|
||||
UNION ALL SELECT 'po' || id, description FROM pools
|
||||
UNION ALL SELECT 'ps' || id, description FROM post_sets
|
||||
UNION ALL SELECT 'ua' || id, profile_about FROM users WHERE profile_about IS NOT NULL AND profile_about != ''
|
||||
UNION ALL SELECT 'ui' || id, profile_artinfo FROM users WHERE profile_artinfo IS NOT NULL AND profile_artinfo != ''
|
||||
UNION ALL SELECT 'pd' || id, description FROM posts WHERE description IS NOT NULL AND description != '';
|
||||
|
||||
-- tr -d '\0-\10\13\14\16-\37' < export.json > dtext.json
|
||||
-- jq -c '.[]' dtext.json | gzip > dtext.json.gz
|
@ -1,20 +1,19 @@
|
||||
require "csv"
|
||||
require "dtext/dtext"
|
||||
require "json"
|
||||
require "yaml"
|
||||
require "zlib"
|
||||
|
||||
differences = []
|
||||
CSV.open("dtext_reference.csv", "r").each do |row|
|
||||
input = row[0]
|
||||
color_expected = row[1]
|
||||
no_color_expected = row[2]
|
||||
|
||||
color = DText.parse(input, allow_color: true)[0]
|
||||
no_color = DText.parse(input, allow_color: false)[0]
|
||||
if color != color_expected
|
||||
differences << [input, color_expected, color]
|
||||
end
|
||||
if no_color != no_color_expected
|
||||
differences << [input, no_color_expected, no_color]
|
||||
Zlib::GzipReader.open("dtext_reference.json.gz") do |file|
|
||||
file.each_line.with_index do |line, i|
|
||||
puts i if i % 10_000 == 0
|
||||
json = JSON.parse(line)
|
||||
|
||||
dtext = DText.parse(json["i"], allow_color: false)[0]
|
||||
if dtext != json["o"]
|
||||
differences << [json["id"], json["i"], json["o"], dtext]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -1,11 +1,15 @@
|
||||
require "csv"
|
||||
require "dtext/dtext"
|
||||
require "json"
|
||||
require "zlib"
|
||||
|
||||
CSV.open("dtext_reference.csv", "w") do |result|
|
||||
CSV.foreach("dtext.csv") do |row|
|
||||
input = row.first
|
||||
no_color = DText.parse(input, allow_color: false)[0]
|
||||
color = DText.parse(input, allow_color: true)[0]
|
||||
result << [input, color, no_color]
|
||||
Zlib::GzipWriter.open("dtext_reference.json.gz") do |output|
|
||||
Zlib::GzipReader.open("dtext.json.gz") do |file|
|
||||
file.each_line.with_index do |line, i|
|
||||
puts i if i % 10_000 == 0
|
||||
json = JSON.parse(line)
|
||||
|
||||
dtext = DText.parse(json["body"], allow_color: false)[0]
|
||||
output.puts({ id: json["id"], i: json["body"], o: dtext }.to_json + "\n")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
Loading…
Reference in New Issue
Block a user