diff --git a/lib/data_conversion/data_conversion.rb b/lib/data_conversion/data_conversion.rb new file mode 100644 index 0000000000000000000000000000000000000000..ba7467d3e34c69a65371b68f9ef95cc89934a35c --- /dev/null +++ b/lib/data_conversion/data_conversion.rb @@ -0,0 +1,20 @@ +# Copyright (c) 2010, Diaspora Inc. This file is +# licensed under the Affero General Public License version 3 or later. See +# the COPYRIGHT file. + +module DataConversion + class Base + attr_accessor :start_time + + def initialize(start_time = Time.now) + @start_time = start_time + end + + def log(message) + if ['development', 'production'].include?(Rails.env) + puts "#{sprintf("%.2f", Time.now - start_time)}s #{message}" + end + Rails.logger.debug(message) if Rails.logger + end + end +end \ No newline at end of file diff --git a/lib/data_conversion/export_from_mongo.rb b/lib/data_conversion/export_from_mongo.rb new file mode 100644 index 0000000000000000000000000000000000000000..fcfe483cff3335c6e477e3a16e8526d8dfb98a6d --- /dev/null +++ b/lib/data_conversion/export_from_mongo.rb @@ -0,0 +1,237 @@ +# Copyright (c) 2010, Diaspora Inc. This file is +# licensed under the Affero General Public License version 3 or later. See +# the COPYRIGHT file. + +require 'json' +require 'csv' + +module DataConversion + class ExportFromMongo < DataConversion::Base + def csv_options + {:col_sep => ",", + :row_sep => :auto, + :quote_char => '"', + :field_size_limit => nil, + :converters => nil, + :unconverted_fields => nil, + :headers => false, + :return_headers => false, + :header_converters => nil, + :skip_blanks => false, + :force_quotes => false} + end + + def dirname + "export-for-mysql" + end + + def dirpath + "#{Rails.root}/#{dirname}" + end + + def clear_dir + `rm -rf #{dirpath}` + `mkdir -p #{dirpath}/json` + `mkdir -p #{dirpath}/csv` + end + + def db_name + "diaspora-#{Rails.env}" + end + + def models + @models ||= [{:name => :aspects}, + {:name => :comments}, + {:name => :contacts}, + {:name => :invitations}, + {:name => :notifications}, + {:name => :people}, + {:name => :posts}, + {:name => :requests}, + {:name => :users}, + ] + end + + def id_sed + @id_sed = sed_replace('{\ \"$oid\"\ :\ \(\"[^"]*\"\)\ }') + end + + def date_sed + @date_sed = sed_replace('{\ \"$date\"\ :\ \([0-9]*\)\ }') + end + + def sed_replace(regex) + "sed 's/#{regex}/\\1/g'" + end + + def json_for_model model_name + "mongoexport -d #{db_name} -c #{model_name} | #{id_sed} | #{date_sed}" + end + + def write_json_export + log "Starting JSON export..." + models.each do |model| + log "Starting #{model[:name]} JSON export..." + filename ="#{dirpath}/json/#{model[:name]}.json" + model[:json_file] = filename + `#{json_for_model(model[:name])} > #{filename}` + log "Completed #{model[:name]} JSON export to #{dirname}/json/#{model[:name]}.json." + end + log "JSON export complete." + end + + def convert_json_files + models.each do |model| + self.send("#{model[:name]}_json_to_csv".to_sym, model) + end + end + + def generic_json_to_csv model_hash + log "Converting #{model_hash[:name]} json to csv" + json_file = File.open(model_hash[:json_file]) + + csv = CSV.open("#{dirpath}/csv/#{model_hash[:name]}.csv", 'w') + csv << model_hash[:attrs] + + json_file.each do |aspect_json| + hash = JSON.parse(aspect_json) + csv << yield(hash) + end + json_file.close + csv.close + end + + def comments_json_to_csv model_hash + model_hash[:attrs] = ["mongo_id", "post_mongo_id", "person_mongo_id", "diaspora_handle", "text", "youtube_titles"] + generic_json_to_csv(model_hash) do |hash| + mongo_attrs = ["_id", "post_id", "person_id", "diaspora_handle", "text", "youtube_titles"] + mongo_attrs.map { |attr_name| hash[attr_name] } + end + end + + def contacts_json_to_csv model_hash + model_hash[:main_attrs] = ["mongo_id", "user_mongo_id", "person_mongo_id", "pending", "created_at", "updated_at"] + #Post Visibilities + model_hash[:join_table_name] = :aspect_memberships + model_hash[:join_table_attrs] = ["contact_mongo_id", "aspect_mongo_id"] + + generic_json_to_two_csvs(model_hash) do |hash| + main_mongo_attrs = ["_id", "user_id", "person_id", "pending", "created_at", "updated_at"] + main_row = main_mongo_attrs.map { |attr_name| hash[attr_name] } + aspect_membership_rows = hash["aspect_ids"].map { |id| [hash["_id"], id] } + [main_row, aspect_membership_rows] + end + #Also writes the aspect memberships csv + end + + def invitations_json_to_csv model_hash + model_hash[:attrs] = ["mongo_id", "recipient_mongo_id", "sender_mongo_id", "aspect_mongo_id", "message"] + generic_json_to_csv(model_hash) do |hash| + mongo_attrs = ["_id", "to_id", "from_id", "into_id", "message"] + mongo_attrs.map { |attr_name| hash[attr_name] } + end + end + + def notifications_json_to_csv model_hash + model_hash[:attrs] = ["mongo_id", "target_id", "target_type", "unread"] + generic_json_to_csv(model_hash) do |hash| + mongo_attrs = ["_id", "target_id", "kind", "unread"] + mongo_attrs.map { |attr_name| hash[attr_name] } + end + end + + def people_json_to_csv model_hash + model_hash[:attrs] = ["created_at", "updated_at", "serialized_public_key", "url", "mongo_id", "owner_mongo_id", "diaspora_handle"] + model_hash[:profile_attrs] = ["image_url_medium", "searchable", "image_url", "person_mongo_id", "gender", "diaspora_handle", "birthday", "last_name", "bio", "image_url_small", "first_name"] + #Also writes the profiles csv + + log "Converting #{model_hash[:name]} json to csv" + json_file = File.open(model_hash[:json_file]) + + people_csv = CSV.open("#{dirpath}/csv/#{model_hash[:name]}.csv", 'w') + people_csv << model_hash[:attrs] + + profiles_csv = CSV.open("#{dirpath}/csv/profiles.csv", 'w') + profiles_csv << model_hash[:profile_attrs] + + json_file.each do |aspect_json| + hash = JSON.parse(aspect_json) + person_row = model_hash[:attrs].map do |attr_name| + attr_name = attr_name.gsub("mongo_", "") + hash[attr_name] + end + people_csv << person_row + + profile_row = model_hash[:profile_attrs].map do |attr_name| + attr_name = attr_name.gsub("mongo_", "") + hash["profile"][attr_name] + end + profiles_csv << person_row + end + json_file.close + people_csv.close + profiles_csv.close + end + + def posts_json_to_csv model_hash + model_hash[:attrs] =["youtube_titles", "pending", "created_at", "public", "updated_at", "status_message_mongo_id", "caption", "remote_photo_path", "random_string", "image", "mongo_id", "type", "diaspora_handle", "person_mongo_id", "message"] + generic_json_to_csv(model_hash) do |hash| + mongo_attrs = ["youtube_titles", "pending", "created_at", "public", "updated_at", "status_message_id", "caption", "remote_photo_path", "random_string", "image", "_id", "_type", "diaspora_handle", "person_id", "message"] + mongo_attrs.map { |attr_name| hash[attr_name] } + end + #has to handle the polymorphic stuff + end + + def requests_json_to_csv model_hash + model_hash[:attrs] = ["mongo_id", "recipient_mongo_id", "sender_mongo_id", "aspect_mongo_id"] + generic_json_to_csv(model_hash) do |hash| + mongo_attrs = ["_id", "to_id", "from_id", "into_id"] + mongo_attrs.map { |attr_name| hash[attr_name] } + end + end + + def users_json_to_csv model_hash + model_hash[:attrs] = ["mongo_id", "username", "serialized_private_key", "encrypted_password", "invites", "invitation_token", "invitation_sent_at", "getting_started", "disable_mail", "language", "last_sign_in_ip", "last_sign_in_at", "reset_password_token", "password_salt"] + generic_json_to_csv(model_hash) do |hash| + mongo_attrs = ["_id", "username", "serialized_private_key", "encrypted_password", "invites", "invitation_token", "invitation_sent_at", "getting_started", "disable_mail", "language", "last_sign_in_ip", "last_sign_in_at", "reset_password_token", "password_salt"] + mongo_attrs.map { |attr_name| hash[attr_name] } + end + end + + def aspects_json_to_csv model_hash + log "Converting aspects json to aspects and post_visibilities csvs" + model_hash[:main_attrs] = ["mongo_id", "name", "created_at", "updated_at"] + #Post Visibilities + model_hash[:join_table_name] = :post_visibilities + model_hash[:join_table_attrs] = ["aspect_mongo_id", "post_mongo_id"] + + generic_json_to_two_csvs(model_hash) do |hash| + mongo_attrs = ["_id", "name", "created_at", "updated_at"] + main_row = mongo_attrs.map { |attr_name| hash[attr_name] } + post_visibility_rows = hash["post_ids"].map { |id| [hash["_id"], id] } + [main_row, post_visibility_rows] + end + end + + def generic_json_to_two_csvs model_hash + log "Converting #{model_hash[:name]} json to two csvs" + json_file = File.open(model_hash[:json_file]) + + main_csv = CSV.open("#{dirpath}/csv/#{model_hash[:name]}.csv", 'w') + main_csv << model_hash[:main_attrs] + + join_csv = CSV.open("#{dirpath}/csv/#{model_hash[:join_table_name]}.csv", 'w') + join_csv << model_hash[:join_table_attrs] + + json_file.each do |aspect_json| + hash = JSON.parse(aspect_json) + result = yield(hash) + main_csv << result.first + result.last.each { |row| join_csv << row } + end + json_file.close + main_csv.close + join_csv.close + end + end +end \ No newline at end of file diff --git a/lib/mongo_to_mysql.rb b/lib/mongo_to_mysql.rb deleted file mode 100644 index ceacf3900c8181ac96a823da5c9f0fadcc2cdb87..0000000000000000000000000000000000000000 --- a/lib/mongo_to_mysql.rb +++ /dev/null @@ -1,248 +0,0 @@ -# Copyright (c) 2010, Diaspora Inc. This file is -# licensed under the Affero General Public License version 3 or later. See -# the COPYRIGHT file. - -require 'json' -require 'csv' - -class MongoToMysql - attr_accessor :start_time - - def initialize(start_time = Time.now) - @start_time = start_time - end - - def csv_options - {:col_sep => ",", - :row_sep => :auto, - :quote_char => '"', - :field_size_limit => nil, - :converters => nil, - :unconverted_fields => nil, - :headers => false, - :return_headers => false, - :header_converters => nil, - :skip_blanks => false, - :force_quotes => false } - end - - def dirname - "tmp/export-for-mysql" - end - - def dirpath - "#{Rails.root}/#{dirname}" - end - - def clear_dir - `rm -rf #{dirpath}` - `mkdir -p #{dirpath}/json` - `mkdir -p #{dirpath}/csv` - end - - def db_name - "diaspora-#{Rails.env}" - end - - def models - @models ||= [ {:name => :aspects}, - {:name => :comments}, - {:name => :contacts}, - {:name => :invitations}, - {:name => :notifications}, - {:name => :people}, - {:name => :posts}, - {:name => :requests}, - {:name => :users}, - ] - end - - def id_sed - @id_sed = sed_replace('{\ \"$oid\"\ :\ \(\"[^"]*\"\)\ }') - end - - def date_sed - @date_sed = sed_replace('{\ \"$date\"\ :\ \([0-9]*\)\ }') - end - - def sed_replace(regex) - "sed 's/#{regex}/\\1/g'" - end - - def json_for_model model_name - "mongoexport -d #{db_name} -c #{model_name} | #{id_sed} | #{date_sed}" - end - - def write_json_export - log "Starting JSON export..." - models.each do |model| - log "Starting #{model[:name]} JSON export..." - filename ="#{dirpath}/json/#{model[:name]}.json" - model[:json_file] = filename - `#{json_for_model(model[:name])} > #{filename}` - log "Completed #{model[:name]} JSON export to #{dirname}/json/#{model[:name]}.json." - end - log "JSON export complete." - end - - def log string - if ['development', 'production'].include?(Rails.env) - puts "#{sprintf("%.2f", Time.now - start_time)}s #{string}" - end - Rails.logger.debug(string) if Rails.logger - end - - def convert_json_files - models.each do |model| - self.send("#{model[:name]}_json_to_csv".to_sym, model) - end - end - - def generic_json_to_csv model_hash - log "Converting #{model_hash[:name]} json to csv" - json_file = File.open(model_hash[:json_file]) - - csv = CSV.open("#{dirpath}/csv/#{model_hash[:name]}.csv",'w') - csv << model_hash[:attrs] - - json_file.each do |aspect_json| - hash = JSON.parse(aspect_json) - csv << yield(hash) - end - json_file.close - csv.close - end - - def comments_json_to_csv model_hash - model_hash[:attrs] = ["mongo_id", "post_mongo_id", "person_mongo_id", "diaspora_handle", "text", "youtube_titles"] - generic_json_to_csv(model_hash) do |hash| - mongo_attrs = ["_id", "post_id", "person_id", "diaspora_handle", "text", "youtube_titles"] - mongo_attrs.map{|attr_name| hash[attr_name]} - end - end - - def contacts_json_to_csv model_hash - model_hash[:main_attrs] = ["mongo_id", "user_mongo_id", "person_mongo_id", "pending", "created_at", "updated_at"] - #Post Visibilities - model_hash[:join_table_name] = :aspect_memberships - model_hash[:join_table_attrs] = ["contact_mongo_id", "aspect_mongo_id"] - - generic_json_to_two_csvs(model_hash) do |hash| - main_mongo_attrs = ["_id", "user_id", "person_id", "pending", "created_at", "updated_at"] - main_row = main_mongo_attrs.map{|attr_name| hash[attr_name]} - aspect_membership_rows = hash["aspect_ids"].map{|id| [hash["_id"], id]} - [main_row, aspect_membership_rows] - end - #Also writes the aspect memberships csv - end - - def invitations_json_to_csv model_hash - model_hash[:attrs] = ["mongo_id", "recipient_mongo_id", "sender_mongo_id", "aspect_mongo_id", "message"] - generic_json_to_csv(model_hash) do |hash| - mongo_attrs = ["_id", "to_id", "from_id", "into_id", "message"] - mongo_attrs.map{|attr_name| hash[attr_name]} - end - end - - def notifications_json_to_csv model_hash - model_hash[:attrs] = ["mongo_id", "target_id", "target_type", "unread"] - generic_json_to_csv(model_hash) do |hash| - mongo_attrs = ["_id", "target_id", "kind", "unread"] - mongo_attrs.map{|attr_name| hash[attr_name]} - end - end - - def people_json_to_csv model_hash - model_hash[:attrs] = ["created_at", "updated_at", "serialized_public_key", "url", "mongo_id", "owner_mongo_id", "diaspora_handle"] - model_hash[:profile_attrs] = ["image_url_medium", "searchable", "image_url", "person_mongo_id", "gender", "diaspora_handle", "birthday", "last_name", "bio", "image_url_small", "first_name"] - #Also writes the profiles csv - - log "Converting #{model_hash[:name]} json to csv" - json_file = File.open(model_hash[:json_file]) - - people_csv = CSV.open("#{dirpath}/csv/#{model_hash[:name]}.csv",'w') - people_csv << model_hash[:attrs] - - profiles_csv = CSV.open("#{dirpath}/csv/profiles.csv",'w') - profiles_csv << model_hash[:profile_attrs] - - json_file.each do |aspect_json| - hash = JSON.parse(aspect_json) - person_row = model_hash[:attrs].map do |attr_name| - attr_name = attr_name.gsub("mongo_", "") - hash[attr_name] - end - people_csv << person_row - - profile_row = model_hash[:profile_attrs].map do |attr_name| - attr_name = attr_name.gsub("mongo_", "") - hash["profile"][attr_name] - end - profiles_csv << person_row - end - json_file.close - people_csv.close - profiles_csv.close - end - - def posts_json_to_csv model_hash - model_hash[:attrs] =["youtube_titles", "pending", "created_at", "public", "updated_at", "status_message_mongo_id", "caption", "remote_photo_path", "random_string", "image", "mongo_id", "type", "diaspora_handle", "person_mongo_id", "message" ] - generic_json_to_csv(model_hash) do |hash| - mongo_attrs = ["youtube_titles", "pending", "created_at", "public", "updated_at", "status_message_id", "caption", "remote_photo_path", "random_string", "image", "_id", "_type", "diaspora_handle", "person_id", "message" ] - mongo_attrs.map{|attr_name| hash[attr_name]} - end - #has to handle the polymorphic stuff - end - - def requests_json_to_csv model_hash - model_hash[:attrs] = ["mongo_id", "recipient_mongo_id", "sender_mongo_id", "aspect_mongo_id"] - generic_json_to_csv(model_hash) do |hash| - mongo_attrs = ["_id", "to_id", "from_id", "into_id"] - mongo_attrs.map{|attr_name| hash[attr_name]} - end - end - - def users_json_to_csv model_hash - model_hash[:attrs] = ["mongo_id", "username", "serialized_private_key", "encrypted_password", "invites", "invitation_token", "invitation_sent_at", "getting_started", "disable_mail", "language", "last_sign_in_ip", "last_sign_in_at", "reset_password_token", "password_salt"] - generic_json_to_csv(model_hash) do |hash| - mongo_attrs = ["_id", "username", "serialized_private_key", "encrypted_password", "invites", "invitation_token", "invitation_sent_at", "getting_started", "disable_mail", "language", "last_sign_in_ip", "last_sign_in_at", "reset_password_token", "password_salt"] - mongo_attrs.map{|attr_name| hash[attr_name]} - end - end - - def aspects_json_to_csv model_hash - log "Converting aspects json to aspects and post_visibilities csvs" - model_hash[:main_attrs] = ["mongo_id", "name", "created_at", "updated_at"] - #Post Visibilities - model_hash[:join_table_name] = :post_visibilities - model_hash[:join_table_attrs] = ["aspect_mongo_id", "post_mongo_id"] - - generic_json_to_two_csvs(model_hash) do |hash| - mongo_attrs = ["_id", "name", "created_at", "updated_at"] - main_row = mongo_attrs.map{|attr_name| hash[attr_name]} - post_visibility_rows = hash["post_ids"].map{|id| [hash["_id"],id]} - [main_row, post_visibility_rows] - end - end - - def generic_json_to_two_csvs model_hash - log "Converting #{model_hash[:name]} json to two csvs" - json_file = File.open(model_hash[:json_file]) - - main_csv = CSV.open("#{dirpath}/csv/#{model_hash[:name]}.csv",'w') - main_csv << model_hash[:main_attrs] - - join_csv = CSV.open("#{dirpath}/csv/#{model_hash[:join_table_name]}.csv",'w') - join_csv << model_hash[:join_table_attrs] - - json_file.each do |aspect_json| - hash = JSON.parse(aspect_json) - result = yield(hash) - main_csv << result.first - result.last.each{|row| join_csv << row} - end - json_file.close - main_csv.close - join_csv.close - end -end diff --git a/lib/tasks/migrations.rake b/lib/tasks/migrations.rake index f442fff85724d6685d1f3bc736f56108b2dcc632..506138c7e5df53c3b8e9c897f24b6867d3939169 100644 --- a/lib/tasks/migrations.rake +++ b/lib/tasks/migrations.rake @@ -1,12 +1,13 @@ -# Copyright (c) 2010, Diaspora Inc. This file is -# licensed under the Affero General Public License version 3 or later. See -# the COPYRIGHT file. +# Copyright (c) 2010, Diaspora Inc. This file is +# licensed under the Affero General Public License version 3 or later. See +# the COPYRIGHT file. + +Dir.glob(File.join(Rails.root, 'lib', 'data_conversion', '*.rb')).each { |f| require f } namespace :migrations do desc 'export data for mysql import' task :export_for_mysql do - require File.join(Rails.root, 'lib', 'mongo_to_mysql') - migrator = MongoToMysql.new + migrator = DataConversion::ExportFromMongo.new migrator.log("**** Starting export for MySQL ****") migrator.clear_dir migrator.write_json_export diff --git a/spec/lib/mongo_to_mysql_spec.rb b/spec/lib/data_conversion/export_from_mongo_spec.rb similarity index 74% rename from spec/lib/mongo_to_mysql_spec.rb rename to spec/lib/data_conversion/export_from_mongo_spec.rb index 8ecc73c8401ded19c43f581aa0d48425884cbddb..655c72d525ab2a6d00714740281895c7125f894f 100644 --- a/spec/lib/mongo_to_mysql_spec.rb +++ b/spec/lib/data_conversion/export_from_mongo_spec.rb @@ -1,8 +1,13 @@ +# Copyright (c) 2010, Diaspora Inc. This file is +# licensed under the Affero General Public License version 3 or later. See +# the COPYRIGHT file. + require 'spec_helper' -require 'lib/mongo_to_mysql' -describe MongoToMysql do +Dir.glob(File.join(Rails.root, 'lib', 'data_conversion', '*.rb')).each { |f| require f } + +describe DataConversion::ExportFromMongo do before do - @migrator = MongoToMysql.new + @migrator = DataConversion::ExportFromMongo.new end describe '#sed_replace' do before do