first draft of the new data exporter

This commit is contained in:
Georg Gadinger 2022-12-07 23:20:45 -08:00
parent 5e3ee6b93c
commit 4229c7f579
15 changed files with 355 additions and 224 deletions

View File

@ -128,3 +128,6 @@ Style/RescueStandardError:
Style/Encoding: Style/Encoding:
Enabled: false Enabled: false
Style/EndlessMethod:
EnforcedStyle: allow_always

View File

@ -111,3 +111,5 @@ gem "net-imap"
gem "net-pop" gem "net-pop"
gem "pundit", "~> 2.2" gem "pundit", "~> 2.2"
gem "rubyzip", "~> 2.3"

View File

@ -413,6 +413,7 @@ GEM
ruby-progressbar (1.11.0) ruby-progressbar (1.11.0)
ruby-vips (2.1.4) ruby-vips (2.1.4)
ffi (~> 1.12) ffi (~> 1.12)
rubyzip (2.3.2)
sanitize (6.0.0) sanitize (6.0.0)
crass (~> 1.0.2) crass (~> 1.0.2)
nokogiri (>= 1.12.0) nokogiri (>= 1.12.0)
@ -570,6 +571,7 @@ DEPENDENCIES
rubocop (~> 1.39) rubocop (~> 1.39)
rubocop-rails (~> 2.17) rubocop-rails (~> 2.17)
ruby-progressbar ruby-progressbar
rubyzip (~> 2.3)
sanitize sanitize
sassc-rails sassc-rails
sentry-rails sentry-rails

View File

@ -393,7 +393,7 @@ en:
title: "Export" title: "Export"
heading: "Export your data" heading: "Export your data"
body_html: | body_html: |
<p>The data is inside a <code>.tar.gz</code> archive and available in three formats: YAML, JSON, and XML. <p>The data is inside a <code>.zip</code> archive that contains some JSON files.
The archive also contains a copy of your profile picture and header picture in all sizes.</p> The archive also contains a copy of your profile picture and header picture in all sizes.</p>
<p>Please note that you can only export your data once a week. Exporting your data <p>Please note that you can only export your data once a week. Exporting your data
will take a while, so please be patient. You will receive a question once exporting will take a while, so please be patient. You will receive a question once exporting

View File

@ -1,260 +1,78 @@
# frozen_string_literal: true # frozen_string_literal: true
require "json" require "fileutils"
require "yaml" require "securerandom"
require "httparty" require "zip/filesystem"
require "use_case/data_export/answers"
require "use_case/data_export/appendables"
require "use_case/data_export/comments"
require "use_case/data_export/inbox_entries"
require "use_case/data_export/mute_rules"
require "use_case/data_export/questions"
require "use_case/data_export/relationships"
require "use_case/data_export/theme"
require "use_case/data_export/user"
# the justask data exporter, now with 200% less shelling out to system tools!
#
# the data export can be easily extended by subclassing `UseCase::DataExport::Base`
# and `require`ing it above
class Exporter class Exporter
EXPORT_ROLES = %i[administrator moderator].freeze
def initialize(user) def initialize(user)
@user = user @user = user
@obj = {}
@export_dirname = Dir.mktmpdir("rs-export-") @export_name = "export-#{@user.id}-#{SecureRandom.base36(32)}"
@export_filename = File.basename(@export_dirname) FileUtils.mkdir_p(Rails.public_path.join("export")) # ensure the public export path exists
export_zipfile_path = Rails.public_path.join("export", "#{@export_name}.zip")
@zipfile = Zip::File.open(export_zipfile_path, Zip::File::CREATE)
end end
def export def export
@user.export_processing = true @user.export_processing = true
@user.save validate: false @user.save validate: false
collect_user_info
collect_questions prepare_zipfile
collect_answers write_files
collect_comments
collect_smiles
finalize
publish publish
rescue => e rescue => e
Sentry.capture_exception(e) Sentry.capture_exception(e)
@user.export_processing = false @user.export_processing = false
@user.save validate: false @user.save validate: false
raise # so that e.g. the sidekiq job fails
ensure ensure
FileUtils.remove_dir(@export_dirname) @zipfile.close
end end
private private
def collect_user_info # creates some directories we want to exist and sets a nice comment
%i[answered_count asked_count comment_smiled_count commented_count def prepare_zipfile
confirmation_sent_at confirmed_at created_at profile_header profile_header_h profile_header_w profile_header_x profile_header_y @zipfile.mkdir(@export_name)
profile_picture_w profile_picture_h profile_picture_x profile_picture_y current_sign_in_at current_sign_in_ip @zipfile.mkdir("#{@export_name}/pictures")
id last_sign_in_at last_sign_in_ip locale
privacy_allow_anonymous_questions privacy_allow_public_timeline privacy_allow_stranger_answers @zipfile.comment = <<~COMMENT
privacy_show_in_search profile_header_file_name profile_picture_file_name #{APP_CONFIG.fetch(:site_name)} export done for #{@user.screen_name} on #{Time.now.utc.iso8601}
screen_name show_foreign_themes sign_in_count smiled_count updated_at].each do |f| COMMENT
@obj[f] = @user.send f
end end
@obj[:profile] = {} # writes the files to the zip file
%i[display_name motivation_header website location description].each do |f| def write_files
@obj[:profile][f] = @user.profile.send f UseCase::DataExport::Base.descendants.each do |export_klass|
end export_klass.call(user: @user).each do |file_name, contents|
@zipfile.file.open("#{@export_name}/#{file_name}", "wb".dup) do |file| # .dup because of %(can't modify frozen String: "wb")
EXPORT_ROLES.each do |role| file.write contents
@obj[role] = @user.has_role?(role)
end end
end end
def collect_questions
@obj[:questions] = []
@user.questions.each do |q|
@obj[:questions] << process_question(q, include_user: false)
end
end
def collect_answers
@obj[:answers] = []
@user.answers.each do |a|
@obj[:answers] << process_answer(a, include_user: false)
end
end
def collect_comments
@obj[:comments] = []
@user.comments.each do |c|
@obj[:comments] << process_comment(c, include_user: false, include_answer: true)
end
end
def collect_smiles
@obj[:smiles] = []
@user.smiles.each do |s|
@obj[:smiles] << process_smile(s)
end
end
def finalize
`mkdir -p "#{Rails.root.join "public", "export"}"`
`mkdir -p #{@export_dirname}/pictures`
if @user.profile_picture_file_name
%i[large medium small original].each do |s|
url = @user.profile_picture.url(s)
target_file = "#{@export_dirname}/pictures/picture_#{s}_#{@user.profile_picture_file_name}"
File.open target_file, "wb" do |f|
f.binmode
data = if url.start_with?("/")
File.read(Rails.root.join("public", url.sub(%r{\A/+}, "")))
else
HTTParty.get(url).parsed_response
end
f.write data
end
end
end
if @user.profile_header_file_name
%i[web mobile retina original].each do |s|
url = @user.profile_header.url(s)
target_file = "#{@export_dirname}/pictures/header_#{s}_#{@user.profile_header_file_name}"
File.open target_file, "wb" do |f|
f.binmode
data = if url.start_with?("/")
File.read(Rails.root.join("public", url.sub(%r{\A/+}, "")))
else
HTTParty.get(url).parsed_response
end
f.write data
end
end
end
File.open "#{@export_dirname}/#{@export_filename}.json", "w" do |f|
f.puts @obj.to_json
end
File.open "#{@export_dirname}/#{@export_filename}.yml", "w" do |f|
f.puts @obj.to_yaml
end
File.open "#{@export_dirname}/#{@export_filename}.xml", "w" do |f|
f.puts @obj.to_xml
end end
end end
def publish def publish
`tar czvf #{Rails.public_path.join "export", "#{@export_filename}.tar.gz"} #{@export_dirname}` url = "#{APP_CONFIG['https'] ? 'https' : 'http'}://#{APP_CONFIG['hostname']}/export/#{@export_name}.zip"
url = "#{APP_CONFIG['https'] ? 'https' : 'http'}://#{APP_CONFIG['hostname']}/export/#{@export_filename}.tar.gz"
@user.export_processing = false @user.export_processing = false
@user.export_url = url @user.export_url = url
@user.export_created_at = Time.now.utc @user.export_created_at = Time.now.utc
@user.save validate: false @user.save validate: false
url url
end end
def process_question(question, options = {})
opts = {
include_user: true,
include_answers: true
}.merge(options)
qobj = {}
%i[answer_count author_is_anonymous content created_at id].each do |f|
qobj[f] = question.send f
end
if opts[:include_answers]
qobj[:answers] = []
question.answers.each do |a|
next if a.nil?
qobj[:answers] << process_answer(a, include_question: false)
end
end
if opts[:include_user]
qobj[:user] = question.author_is_anonymous ? nil : user_stub(question.user)
end
qobj
end
def process_answer(answer, options = {})
opts = {
include_question: true,
include_user: true,
include_comments: true
}.merge(options)
aobj = {}
%i[comment_count content created_at id smile_count].each do |f|
aobj[f] = answer.send f
end
if opts[:include_user] && answer.user
aobj[:user] = user_stub(answer.user)
end
if opts[:include_question] && answer.question
aobj[:question] = process_question(answer.question, include_user: true, include_answers: false)
end
if opts[:include_comments]
aobj[:comments] = []
answer.comments.each do |c|
next if c.nil?
aobj[:comments] << process_comment(c, include_user: true, include_answer: false)
end
end
aobj
end
def process_comment(comment, options = {})
opts = {
include_user: true,
include_answer: false
}.merge(options)
cobj = {}
%i[content created_at id].each do |f|
cobj[f] = comment.send f
end
if opts[:include_user]
cobj[:user] = user_stub(comment.user)
end
if opts[:include_answer] && comment.answer
cobj[:answer] = process_answer(comment.answer, include_comments: false)
end
cobj
end
def process_smile(smile)
return unless smile.parent
sobj = {}
%i[id created_at].each do |f|
sobj[f] = smile.send f
end
type = smile.parent.class.name.downcase
sobj[type.to_sym] = send(:"process_#{type}", smile.parent, include_comments: false, include_user: false)
sobj
end
def user_stub(user)
return nil if user.nil?
uobj = {}
%i[answered_count asked_count comment_smiled_count commented_count created_at
id permanently_banned? screen_name smiled_count].each do |f|
uobj[f] = user.send f
end
uobj[:profile] = {}
%i[display_name motivation_header website location description].each do |f|
uobj[:profile][f] = user.profile.send f
end
EXPORT_ROLES.each do |role|
uobj[role] = user.has_role?(role)
end
uobj
end
end end

View File

@ -0,0 +1,23 @@
# frozen_string_literal: true
require "use_case/data_export/base"
module UseCase
module DataExport
class Answers < UseCase::DataExport::Base
def files = {
"answers.json" => json_file!(
answers: user.answers.map(&method(:collect_answer))
)
}
def collect_answer(answer)
{}.tap do |h|
column_names(::Answer).each do |field|
h[field] = answer[field]
end
end
end
end
end
end

View File

@ -0,0 +1,25 @@
# frozen_string_literal: true
require "use_case/data_export/base"
module UseCase
module DataExport
class Appendables < UseCase::DataExport::Base
def files = {
"appendables.json" => json_file!(
appendables: [
*user.smiles.map(&method(:collect_appendable))
]
)
}
def collect_appendable(appendable)
{}.tap do |h|
column_names(::Appendable).each do |field|
h[field] = appendable[field]
end
end
end
end
end
end

View File

@ -0,0 +1,27 @@
# frozen_string_literal: true
require "json"
require "use_case/base"
module UseCase
module DataExport
class Base < UseCase::Base
# the user that is being exported
option :user
def call = files
# returns a hash with `{ "file_name" => "file_contents\n" }`
def files = raise NotImplementedError
# helper method that returns the column names of `model` as symbols
def column_names(model) = model.column_names.map(&:to_sym)
# helper method that generates the content of a json file
#
# it ensures the final newline exists, as the exporter only uses File#write
def json_file!(**hash) = "#{JSON.pretty_generate(hash.as_json)}\n"
end
end
end

View File

@ -0,0 +1,23 @@
# frozen_string_literal: true
require "use_case/data_export/base"
module UseCase
module DataExport
class Comments < UseCase::DataExport::Base
def files = {
"comments.json" => json_file!(
comments: user.comments.map(&method(:collect_comment))
)
}
def collect_comment(comment)
{}.tap do |h|
column_names(::Comment).each do |field|
h[field] = comment[field]
end
end
end
end
end
end

View File

@ -0,0 +1,23 @@
# frozen_string_literal: true
require "use_case/data_export/base"
module UseCase
module DataExport
class InboxEntries < UseCase::DataExport::Base
def files = {
"inbox_entries.json" => json_file!(
inbox_entries: user.inboxes.map(&method(:collect_inbox_entry))
)
}
def collect_inbox_entry(inbox_entry)
{}.tap do |h|
column_names(::Inbox).each do |field|
h[field] = inbox_entry[field]
end
end
end
end
end
end

View File

@ -0,0 +1,23 @@
# frozen_string_literal: true
require "use_case/data_export/base"
module UseCase
module DataExport
class MuteRules < UseCase::DataExport::Base
def files = {
"mute_rules.json" => json_file!(
mute_rules: user.mute_rules.map(&method(:collect_mute_rule))
)
}
def collect_mute_rule(mute_rule)
{}.tap do |h|
column_names(::MuteRule).each do |field|
h[field] = mute_rule[field]
end
end
end
end
end
end

View File

@ -0,0 +1,27 @@
# frozen_string_literal: true
require "use_case/data_export/base"
module UseCase
module DataExport
class Questions < UseCase::DataExport::Base
IGNORED_FIELDS = %i[
author_identifier
].freeze
def files = {
"questions.json" => json_file!(
questions: user.questions.map(&method(:collect_question))
)
}
def collect_question(question)
{}.tap do |h|
(column_names(::Question) - IGNORED_FIELDS).each do |field|
h[field] = question[field]
end
end
end
end
end
end

View File

@ -0,0 +1,29 @@
# frozen_string_literal: true
require "use_case/data_export/base"
module UseCase
module DataExport
class Relationships < UseCase::DataExport::Base
def files = {
"relationships.json" => json_file!(
relationships: [
# don't want to add the passive (block) relationships here as it
# would reveal who e.g. blocked the exported user, which is
# considered A Bad Idea™
*user.active_follow_relationships.map(&method(:collect_relationship)),
*user.active_block_relationships.map(&method(:collect_relationship))
]
)
}
def collect_relationship(relationship)
{}.tap do |h|
column_names(::Relationship).each do |field|
h[field] = relationship[field]
end
end
end
end
end
end

View File

@ -0,0 +1,27 @@
# frozen_string_literal: true
require "use_case/data_export/base"
module UseCase
module DataExport
class Theme < UseCase::DataExport::Base
def files
return {} unless user.theme
{
"theme.json" => json_file!(
theme: theme_data
)
}
end
def theme_data
{}.tap do |obj|
column_names(::Theme).each do |field|
obj[field] = user.theme[field]
end
end
end
end
end
end

View File

@ -0,0 +1,79 @@
# frozen_string_literal: true
require "httparty"
require "use_case/data_export/base"
module UseCase
module DataExport
class User < UseCase::DataExport::Base
EXPORT_ROLES = %i[administrator moderator].freeze
IGNORED_FIELDS_USERS = %i[
confirmation_token
encrypted_password
otp_secret_key
reset_password_sent_at
reset_password_token
].freeze
IGNORED_FIELDS_PROFILES = %i[
id
user_id
].freeze
def files = {
"user.json" => json_file!(
user: user_data,
profile: profile_data,
roles: roles_data
),
**pictures
}
def user_data
{}.tap do |obj|
(column_names(::User) - IGNORED_FIELDS_USERS).each do |field|
obj[field] = user[field]
end
end
end
def profile_data
{}.tap do |profile|
(column_names(::Profile) - IGNORED_FIELDS_PROFILES).each do |field|
profile[field] = user.profile[field]
end
end
end
def roles_data
{}.tap do |obj|
EXPORT_ROLES.each do |role|
obj[role] = user.has_role?(role)
end
end
end
def pictures
{}.tap do |hash|
add_picture(user.profile_picture, to: hash)
add_picture(user.profile_header, to: hash)
end.compact
end
def add_picture(picture, to:)
return if picture.blank?
picture.versions.each do |version, file|
export_filename = "pictures/#{file.mounted_as}_#{version}_#{file.filename}"
to[export_filename] = if file.url.start_with?("/")
Rails.public_path.join(file.url.sub(%r{\A/+}, "")).read rescue "ceci n'est pas un image" # TODO: fix this
else
HTTParty.get(file.url).parsed_response
end
end
end
end
end
end