diff --git a/.rubocop.yml b/.rubocop.yml index be73721b..6091bca9 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -128,3 +128,6 @@ Style/RescueStandardError: Style/Encoding: Enabled: false + +Style/EndlessMethod: + EnforcedStyle: allow_always diff --git a/Gemfile b/Gemfile index c20b4ef2..e8cd20ab 100644 --- a/Gemfile +++ b/Gemfile @@ -111,3 +111,5 @@ gem "net-imap" gem "net-pop" gem "pundit", "~> 2.2" + +gem "rubyzip", "~> 2.3" diff --git a/Gemfile.lock b/Gemfile.lock index 7c2e6ae3..be5d01e4 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -413,6 +413,7 @@ GEM ruby-progressbar (1.11.0) ruby-vips (2.1.4) ffi (~> 1.12) + rubyzip (2.3.2) sanitize (6.0.0) crass (~> 1.0.2) nokogiri (>= 1.12.0) @@ -570,6 +571,7 @@ DEPENDENCIES rubocop (~> 1.39) rubocop-rails (~> 2.17) ruby-progressbar + rubyzip (~> 2.3) sanitize sassc-rails sentry-rails diff --git a/config/locales/views.en.yml b/config/locales/views.en.yml index 9f3b527b..cfe1c84d 100644 --- a/config/locales/views.en.yml +++ b/config/locales/views.en.yml @@ -393,7 +393,7 @@ en: title: "Export" heading: "Export your data" body_html: | -

The data is inside a .tar.gz archive and available in three formats: YAML, JSON, and XML. +

The data is inside a .zip archive that contains some JSON files. The archive also contains a copy of your profile picture and header picture in all sizes.

Please note that you can only export your data once a week. Exporting your data will take a while, so please be patient. You will receive a question once exporting diff --git a/lib/exporter.rb b/lib/exporter.rb index e08abd78..72601b65 100644 --- a/lib/exporter.rb +++ b/lib/exporter.rb @@ -1,260 +1,78 @@ # frozen_string_literal: true -require "json" -require "yaml" -require "httparty" +require "fileutils" +require "securerandom" +require "zip/filesystem" +require "use_case/data_export/answers" +require "use_case/data_export/appendables" +require "use_case/data_export/comments" +require "use_case/data_export/inbox_entries" +require "use_case/data_export/mute_rules" +require "use_case/data_export/questions" +require "use_case/data_export/relationships" +require "use_case/data_export/theme" +require "use_case/data_export/user" + +# the justask data exporter, now with 200% less shelling out to system tools! +# +# the data export can be easily extended by subclassing `UseCase::DataExport::Base` +# and `require`ing it above class Exporter - EXPORT_ROLES = %i[administrator moderator].freeze - def initialize(user) @user = user - @obj = {} - @export_dirname = Dir.mktmpdir("rs-export-") - @export_filename = File.basename(@export_dirname) + + @export_name = "export-#{@user.id}-#{SecureRandom.base36(32)}" + FileUtils.mkdir_p(Rails.public_path.join("export")) # ensure the public export path exists + export_zipfile_path = Rails.public_path.join("export", "#{@export_name}.zip") + @zipfile = Zip::File.open(export_zipfile_path, Zip::File::CREATE) end def export @user.export_processing = true @user.save validate: false - collect_user_info - collect_questions - collect_answers - collect_comments - collect_smiles - finalize + + prepare_zipfile + write_files publish rescue => e Sentry.capture_exception(e) @user.export_processing = false @user.save validate: false + raise # so that e.g. the sidekiq job fails ensure - FileUtils.remove_dir(@export_dirname) + @zipfile.close end private - def collect_user_info - %i[answered_count asked_count comment_smiled_count commented_count - confirmation_sent_at confirmed_at created_at profile_header profile_header_h profile_header_w profile_header_x profile_header_y - profile_picture_w profile_picture_h profile_picture_x profile_picture_y current_sign_in_at current_sign_in_ip - id last_sign_in_at last_sign_in_ip locale - privacy_allow_anonymous_questions privacy_allow_public_timeline privacy_allow_stranger_answers - privacy_show_in_search profile_header_file_name profile_picture_file_name - screen_name show_foreign_themes sign_in_count smiled_count updated_at].each do |f| - @obj[f] = @user.send f - end + # creates some directories we want to exist and sets a nice comment + def prepare_zipfile + @zipfile.mkdir(@export_name) + @zipfile.mkdir("#{@export_name}/pictures") - @obj[:profile] = {} - %i[display_name motivation_header website location description].each do |f| - @obj[:profile][f] = @user.profile.send f - end - - EXPORT_ROLES.each do |role| - @obj[role] = @user.has_role?(role) - end + @zipfile.comment = <<~COMMENT + #{APP_CONFIG.fetch(:site_name)} export done for #{@user.screen_name} on #{Time.now.utc.iso8601} + COMMENT end - def collect_questions - @obj[:questions] = [] - @user.questions.each do |q| - @obj[:questions] << process_question(q, include_user: false) - end - end - - def collect_answers - @obj[:answers] = [] - @user.answers.each do |a| - @obj[:answers] << process_answer(a, include_user: false) - end - end - - def collect_comments - @obj[:comments] = [] - @user.comments.each do |c| - @obj[:comments] << process_comment(c, include_user: false, include_answer: true) - end - end - - def collect_smiles - @obj[:smiles] = [] - @user.smiles.each do |s| - @obj[:smiles] << process_smile(s) - end - end - - def finalize - `mkdir -p "#{Rails.root.join "public", "export"}"` - `mkdir -p #{@export_dirname}/pictures` - - if @user.profile_picture_file_name - %i[large medium small original].each do |s| - url = @user.profile_picture.url(s) - target_file = "#{@export_dirname}/pictures/picture_#{s}_#{@user.profile_picture_file_name}" - File.open target_file, "wb" do |f| - f.binmode - data = if url.start_with?("/") - File.read(Rails.root.join("public", url.sub(%r{\A/+}, ""))) - else - HTTParty.get(url).parsed_response - end - f.write data + # writes the files to the zip file + def write_files + UseCase::DataExport::Base.descendants.each do |export_klass| + export_klass.call(user: @user).each do |file_name, contents| + @zipfile.file.open("#{@export_name}/#{file_name}", "wb".dup) do |file| # .dup because of %(can't modify frozen String: "wb") + file.write contents end end end - - if @user.profile_header_file_name - %i[web mobile retina original].each do |s| - url = @user.profile_header.url(s) - target_file = "#{@export_dirname}/pictures/header_#{s}_#{@user.profile_header_file_name}" - File.open target_file, "wb" do |f| - f.binmode - data = if url.start_with?("/") - File.read(Rails.root.join("public", url.sub(%r{\A/+}, ""))) - else - HTTParty.get(url).parsed_response - end - f.write data - end - end - end - - File.open "#{@export_dirname}/#{@export_filename}.json", "w" do |f| - f.puts @obj.to_json - end - - File.open "#{@export_dirname}/#{@export_filename}.yml", "w" do |f| - f.puts @obj.to_yaml - end - - File.open "#{@export_dirname}/#{@export_filename}.xml", "w" do |f| - f.puts @obj.to_xml - end end def publish - `tar czvf #{Rails.public_path.join "export", "#{@export_filename}.tar.gz"} #{@export_dirname}` - url = "#{APP_CONFIG['https'] ? 'https' : 'http'}://#{APP_CONFIG['hostname']}/export/#{@export_filename}.tar.gz" + url = "#{APP_CONFIG['https'] ? 'https' : 'http'}://#{APP_CONFIG['hostname']}/export/#{@export_name}.zip" @user.export_processing = false @user.export_url = url @user.export_created_at = Time.now.utc @user.save validate: false url end - - def process_question(question, options = {}) - opts = { - include_user: true, - include_answers: true - }.merge(options) - - qobj = {} - %i[answer_count author_is_anonymous content created_at id].each do |f| - qobj[f] = question.send f - end - - if opts[:include_answers] - qobj[:answers] = [] - question.answers.each do |a| - next if a.nil? - - qobj[:answers] << process_answer(a, include_question: false) - end - end - - if opts[:include_user] - qobj[:user] = question.author_is_anonymous ? nil : user_stub(question.user) - end - - qobj - end - - def process_answer(answer, options = {}) - opts = { - include_question: true, - include_user: true, - include_comments: true - }.merge(options) - - aobj = {} - %i[comment_count content created_at id smile_count].each do |f| - aobj[f] = answer.send f - end - - if opts[:include_user] && answer.user - aobj[:user] = user_stub(answer.user) - end - - if opts[:include_question] && answer.question - aobj[:question] = process_question(answer.question, include_user: true, include_answers: false) - end - - if opts[:include_comments] - aobj[:comments] = [] - answer.comments.each do |c| - next if c.nil? - - aobj[:comments] << process_comment(c, include_user: true, include_answer: false) - end - end - - aobj - end - - def process_comment(comment, options = {}) - opts = { - include_user: true, - include_answer: false - }.merge(options) - - cobj = {} - %i[content created_at id].each do |f| - cobj[f] = comment.send f - end - - if opts[:include_user] - cobj[:user] = user_stub(comment.user) - end - - if opts[:include_answer] && comment.answer - cobj[:answer] = process_answer(comment.answer, include_comments: false) - end - - cobj - end - - def process_smile(smile) - return unless smile.parent - - sobj = {} - - %i[id created_at].each do |f| - sobj[f] = smile.send f - end - - type = smile.parent.class.name.downcase - sobj[type.to_sym] = send(:"process_#{type}", smile.parent, include_comments: false, include_user: false) - - sobj - end - - def user_stub(user) - return nil if user.nil? - uobj = {} - %i[answered_count asked_count comment_smiled_count commented_count created_at - id permanently_banned? screen_name smiled_count].each do |f| - uobj[f] = user.send f - end - - uobj[:profile] = {} - %i[display_name motivation_header website location description].each do |f| - uobj[:profile][f] = user.profile.send f - end - - EXPORT_ROLES.each do |role| - uobj[role] = user.has_role?(role) - end - - uobj - end end diff --git a/lib/use_case/data_export/answers.rb b/lib/use_case/data_export/answers.rb new file mode 100644 index 00000000..1443ec6d --- /dev/null +++ b/lib/use_case/data_export/answers.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require "use_case/data_export/base" + +module UseCase + module DataExport + class Answers < UseCase::DataExport::Base + def files = { + "answers.json" => json_file!( + answers: user.answers.map(&method(:collect_answer)) + ) + } + + def collect_answer(answer) + {}.tap do |h| + column_names(::Answer).each do |field| + h[field] = answer[field] + end + end + end + end + end +end diff --git a/lib/use_case/data_export/appendables.rb b/lib/use_case/data_export/appendables.rb new file mode 100644 index 00000000..0525431b --- /dev/null +++ b/lib/use_case/data_export/appendables.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "use_case/data_export/base" + +module UseCase + module DataExport + class Appendables < UseCase::DataExport::Base + def files = { + "appendables.json" => json_file!( + appendables: [ + *user.smiles.map(&method(:collect_appendable)) + ] + ) + } + + def collect_appendable(appendable) + {}.tap do |h| + column_names(::Appendable).each do |field| + h[field] = appendable[field] + end + end + end + end + end +end diff --git a/lib/use_case/data_export/base.rb b/lib/use_case/data_export/base.rb new file mode 100644 index 00000000..dc2ded83 --- /dev/null +++ b/lib/use_case/data_export/base.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require "json" + +require "use_case/base" + +module UseCase + module DataExport + class Base < UseCase::Base + # the user that is being exported + option :user + + def call = files + + # returns a hash with `{ "file_name" => "file_contents\n" }` + def files = raise NotImplementedError + + # helper method that returns the column names of `model` as symbols + def column_names(model) = model.column_names.map(&:to_sym) + + # helper method that generates the content of a json file + # + # it ensures the final newline exists, as the exporter only uses File#write + def json_file!(**hash) = "#{JSON.pretty_generate(hash.as_json)}\n" + end + end +end diff --git a/lib/use_case/data_export/comments.rb b/lib/use_case/data_export/comments.rb new file mode 100644 index 00000000..6a6e820d --- /dev/null +++ b/lib/use_case/data_export/comments.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require "use_case/data_export/base" + +module UseCase + module DataExport + class Comments < UseCase::DataExport::Base + def files = { + "comments.json" => json_file!( + comments: user.comments.map(&method(:collect_comment)) + ) + } + + def collect_comment(comment) + {}.tap do |h| + column_names(::Comment).each do |field| + h[field] = comment[field] + end + end + end + end + end +end diff --git a/lib/use_case/data_export/inbox_entries.rb b/lib/use_case/data_export/inbox_entries.rb new file mode 100644 index 00000000..04b2efb4 --- /dev/null +++ b/lib/use_case/data_export/inbox_entries.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require "use_case/data_export/base" + +module UseCase + module DataExport + class InboxEntries < UseCase::DataExport::Base + def files = { + "inbox_entries.json" => json_file!( + inbox_entries: user.inboxes.map(&method(:collect_inbox_entry)) + ) + } + + def collect_inbox_entry(inbox_entry) + {}.tap do |h| + column_names(::Inbox).each do |field| + h[field] = inbox_entry[field] + end + end + end + end + end +end diff --git a/lib/use_case/data_export/mute_rules.rb b/lib/use_case/data_export/mute_rules.rb new file mode 100644 index 00000000..6b8755ba --- /dev/null +++ b/lib/use_case/data_export/mute_rules.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require "use_case/data_export/base" + +module UseCase + module DataExport + class MuteRules < UseCase::DataExport::Base + def files = { + "mute_rules.json" => json_file!( + mute_rules: user.mute_rules.map(&method(:collect_mute_rule)) + ) + } + + def collect_mute_rule(mute_rule) + {}.tap do |h| + column_names(::MuteRule).each do |field| + h[field] = mute_rule[field] + end + end + end + end + end +end diff --git a/lib/use_case/data_export/questions.rb b/lib/use_case/data_export/questions.rb new file mode 100644 index 00000000..7536e7b6 --- /dev/null +++ b/lib/use_case/data_export/questions.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require "use_case/data_export/base" + +module UseCase + module DataExport + class Questions < UseCase::DataExport::Base + IGNORED_FIELDS = %i[ + author_identifier + ].freeze + + def files = { + "questions.json" => json_file!( + questions: user.questions.map(&method(:collect_question)) + ) + } + + def collect_question(question) + {}.tap do |h| + (column_names(::Question) - IGNORED_FIELDS).each do |field| + h[field] = question[field] + end + end + end + end + end +end diff --git a/lib/use_case/data_export/relationships.rb b/lib/use_case/data_export/relationships.rb new file mode 100644 index 00000000..57fb8cb9 --- /dev/null +++ b/lib/use_case/data_export/relationships.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +require "use_case/data_export/base" + +module UseCase + module DataExport + class Relationships < UseCase::DataExport::Base + def files = { + "relationships.json" => json_file!( + relationships: [ + # don't want to add the passive (block) relationships here as it + # would reveal who e.g. blocked the exported user, which is + # considered A Bad Idea™ + *user.active_follow_relationships.map(&method(:collect_relationship)), + *user.active_block_relationships.map(&method(:collect_relationship)) + ] + ) + } + + def collect_relationship(relationship) + {}.tap do |h| + column_names(::Relationship).each do |field| + h[field] = relationship[field] + end + end + end + end + end +end diff --git a/lib/use_case/data_export/theme.rb b/lib/use_case/data_export/theme.rb new file mode 100644 index 00000000..135fc89e --- /dev/null +++ b/lib/use_case/data_export/theme.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require "use_case/data_export/base" + +module UseCase + module DataExport + class Theme < UseCase::DataExport::Base + def files + return {} unless user.theme + + { + "theme.json" => json_file!( + theme: theme_data + ) + } + end + + def theme_data + {}.tap do |obj| + column_names(::Theme).each do |field| + obj[field] = user.theme[field] + end + end + end + end + end +end diff --git a/lib/use_case/data_export/user.rb b/lib/use_case/data_export/user.rb new file mode 100644 index 00000000..f626bc10 --- /dev/null +++ b/lib/use_case/data_export/user.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +require "httparty" + +require "use_case/data_export/base" + +module UseCase + module DataExport + class User < UseCase::DataExport::Base + EXPORT_ROLES = %i[administrator moderator].freeze + + IGNORED_FIELDS_USERS = %i[ + confirmation_token + encrypted_password + otp_secret_key + reset_password_sent_at + reset_password_token + ].freeze + + IGNORED_FIELDS_PROFILES = %i[ + id + user_id + ].freeze + + def files = { + "user.json" => json_file!( + user: user_data, + profile: profile_data, + roles: roles_data + ), + **pictures + } + + def user_data + {}.tap do |obj| + (column_names(::User) - IGNORED_FIELDS_USERS).each do |field| + obj[field] = user[field] + end + end + end + + def profile_data + {}.tap do |profile| + (column_names(::Profile) - IGNORED_FIELDS_PROFILES).each do |field| + profile[field] = user.profile[field] + end + end + end + + def roles_data + {}.tap do |obj| + EXPORT_ROLES.each do |role| + obj[role] = user.has_role?(role) + end + end + end + + def pictures + {}.tap do |hash| + add_picture(user.profile_picture, to: hash) + add_picture(user.profile_header, to: hash) + end.compact + end + + def add_picture(picture, to:) + return if picture.blank? + + picture.versions.each do |version, file| + export_filename = "pictures/#{file.mounted_as}_#{version}_#{file.filename}" + to[export_filename] = if file.url.start_with?("/") + Rails.public_path.join(file.url.sub(%r{\A/+}, "")).read rescue "ceci n'est pas un image" # TODO: fix this + else + HTTParty.get(file.url).parsed_response + end + end + end + end + end +end