Skip to content
Extraits de code Groupes Projets
mongo_to_mysql.rb 8,35 ko
Newer Older
  • Learn to ignore specific revisions
  • #   Copyright (c) 2010, Diaspora Inc.  This file is
    #   licensed under the Affero General Public License version 3 or later.  See
    #   the COPYRIGHT file.
    
    Raphael's avatar
    Raphael a validé
    require 'json'
    require 'csv'
    
    class MongoToMysql
    
      attr_accessor :start_time
    
      def initialize(start_time = Time.now)
        @start_time = start_time
      end
    
    
      def csv_options
        {:col_sep =>  ",",
         :row_sep =>  :auto,
         :quote_char =>  '"',
         :field_size_limit => nil,
         :converters => nil,
         :unconverted_fields => nil,
         :headers => false,
         :return_headers => false,
         :header_converters => nil,
         :skip_blanks => false,
         :force_quotes => false }
      end
    
    Raphael's avatar
    Raphael a validé
      def dirname
        "tmp/export-for-mysql"
      end
    
    Raphael's avatar
    Raphael a validé
      def dirpath
        "#{Rails.root}/#{dirname}"
      end
    
      def clear_dir
        `rm -rf #{dirpath}`
        `mkdir -p #{dirpath}/json`
        `mkdir -p #{dirpath}/csv`
    
    Raphael's avatar
    Raphael a validé
      end
    
    Raphael's avatar
    Raphael a validé
      def db_name
    
    Raphael's avatar
    Raphael a validé
      end
    
    Raphael's avatar
    Raphael a validé
      def models
        @models ||= [ {:name => :aspects},
          {:name => :comments},
          {:name => :contacts},
          {:name => :invitations},
          {:name => :notifications},
          {:name => :people},
          {:name => :posts},
          {:name => :requests},
          {:name => :users},
        ]
      end
    
      def id_sed
        @id_sed = sed_replace('{\ \"$oid\"\ :\ \(\"[^"]*\"\)\ }')
      end
    
      def date_sed
        @date_sed = sed_replace('{\ \"$date\"\ :\ \([0-9]*\)\ }')
      end
    
      def sed_replace(regex)
        "sed 's/#{regex}/\\1/g'"
      end
    
    Raphael's avatar
    Raphael a validé
      def json_for_model model_name
        "mongoexport -d #{db_name} -c #{model_name} | #{id_sed} | #{date_sed}"
      end
    
    Raphael's avatar
    Raphael a validé
      def write_json_export
    
        log "Starting JSON export..."
    
    Raphael's avatar
    Raphael a validé
        models.each do |model|
    
          log "Starting #{model[:name]} JSON export..."
    
    Raphael's avatar
    Raphael a validé
          filename ="#{dirpath}/json/#{model[:name]}.json"
          model[:json_file] = filename
          `#{json_for_model(model[:name])} > #{filename}`
    
          log "Completed #{model[:name]} JSON export to #{dirname}/json/#{model[:name]}.json."
    
    Raphael's avatar
    Raphael a validé
        end
    
        log "JSON export complete."
    
    Raphael's avatar
    Raphael a validé
      end
    
    Raphael's avatar
    Raphael a validé
        if ['development', 'production'].include?(Rails.env)
    
          puts "#{sprintf("%.2f", Time.now - start_time)}s #{string}"
    
    Raphael's avatar
    Raphael a validé
        end
        Rails.logger.debug(string) if Rails.logger
      end
    
    Raphael's avatar
    Raphael a validé
      def convert_json_files
        models.each do |model|
          self.send("#{model[:name]}_json_to_csv".to_sym, model)
        end
      end
    
    Raphael's avatar
    Raphael a validé
      def generic_json_to_csv model_hash
    
        log "Converting #{model_hash[:name]} json to csv"
    
    Raphael's avatar
    Raphael a validé
        json_file = File.open(model_hash[:json_file])
    
        csv = CSV.open("#{dirpath}/csv/#{model_hash[:name]}.csv",'w')
        csv << model_hash[:attrs]
    
        json_file.each do |aspect_json|
          hash = JSON.parse(aspect_json)
          csv << yield(hash)
        end
        json_file.close
        csv.close
      end
    
    Raphael's avatar
    Raphael a validé
      def comments_json_to_csv model_hash
    
    Raphael's avatar
    Raphael a validé
        model_hash[:attrs] = ["mongo_id", "post_mongo_id", "person_mongo_id", "diaspora_handle", "text", "youtube_titles"]
    
    Raphael's avatar
    Raphael a validé
        generic_json_to_csv(model_hash) do |hash|
    
          mongo_attrs = ["_id", "post_id", "person_id", "diaspora_handle", "text", "youtube_titles"]
          mongo_attrs.map{|attr_name| hash[attr_name]}
    
    Raphael's avatar
    Raphael a validé
        end
      end
    
    Raphael's avatar
    Raphael a validé
      def contacts_json_to_csv model_hash
        model_hash[:main_attrs] = ["mongo_id", "user_mongo_id", "person_mongo_id", "pending", "created_at", "updated_at"]
        #Post Visibilities
        model_hash[:join_table_name] = :aspect_memberships
        model_hash[:join_table_attrs] = ["contact_mongo_id", "aspect_mongo_id"]
    
        generic_json_to_two_csvs(model_hash) do |hash|
    
          main_mongo_attrs = ["_id", "user_id", "person_id", "pending", "created_at", "updated_at"]
          main_row = main_mongo_attrs.map{|attr_name| hash[attr_name]}
          aspect_membership_rows = hash["aspect_ids"].map{|id| [hash["_id"], id]}
    
    Raphael's avatar
    Raphael a validé
          [main_row, aspect_membership_rows]
        end
        #Also writes the aspect memberships csv
      end
    
    Raphael's avatar
    Raphael a validé
      def invitations_json_to_csv model_hash
        model_hash[:attrs] = ["mongo_id", "recipient_mongo_id", "sender_mongo_id", "aspect_mongo_id", "message"]
        generic_json_to_csv(model_hash) do |hash|
    
          mongo_attrs = ["_id", "to_id", "from_id", "into_id", "message"]
          mongo_attrs.map{|attr_name| hash[attr_name]}
    
    Raphael's avatar
    Raphael a validé
        end
      end
    
    Raphael's avatar
    Raphael a validé
      def notifications_json_to_csv model_hash
        model_hash[:attrs] = ["mongo_id", "target_id", "target_type", "unread"]
        generic_json_to_csv(model_hash) do |hash|
    
          mongo_attrs = ["_id", "target_id", "kind", "unread"]
          mongo_attrs.map{|attr_name| hash[attr_name]}
    
    Raphael's avatar
    Raphael a validé
        end
      end
    
    Raphael's avatar
    Raphael a validé
      def people_json_to_csv model_hash
    
        model_hash[:attrs] = ["created_at", "updated_at", "serialized_public_key", "url", "mongo_id", "owner_mongo_id", "diaspora_handle"]
        model_hash[:profile_attrs] = ["image_url_medium", "searchable", "image_url", "person_mongo_id", "gender", "diaspora_handle", "birthday", "last_name", "bio", "image_url_small", "first_name"]
    
    Raphael's avatar
    Raphael a validé
        #Also writes the profiles csv
    
        log "Converting #{model_hash[:name]} json to csv"
    
        json_file = File.open(model_hash[:json_file])
    
        people_csv = CSV.open("#{dirpath}/csv/#{model_hash[:name]}.csv",'w')
        people_csv << model_hash[:attrs]
    
        profiles_csv = CSV.open("#{dirpath}/csv/profiles.csv",'w')
        profiles_csv << model_hash[:profile_attrs]
    
        json_file.each do |aspect_json|
          hash = JSON.parse(aspect_json)
          person_row = model_hash[:attrs].map do |attr_name|
            attr_name = attr_name.gsub("mongo_", "")
            hash[attr_name]
          end
          people_csv << person_row
    
          profile_row = model_hash[:profile_attrs].map do |attr_name|
            attr_name = attr_name.gsub("mongo_", "")
            hash["profile"][attr_name]
          end
          profiles_csv << person_row
        end
        json_file.close
        people_csv.close
        profiles_csv.close
    
    Raphael's avatar
    Raphael a validé
      end
    
    Raphael's avatar
    Raphael a validé
      def posts_json_to_csv model_hash
    
    Raphael's avatar
    Raphael a validé
        model_hash[:attrs] =["youtube_titles", "pending", "created_at", "public", "updated_at", "status_message_mongo_id", "caption", "remote_photo_path", "random_string", "image", "mongo_id", "type", "diaspora_handle", "person_mongo_id", "message" ]
        generic_json_to_csv(model_hash) do |hash|
          mongo_attrs = ["youtube_titles", "pending", "created_at", "public", "updated_at", "status_message_id", "caption", "remote_photo_path", "random_string", "image", "_id", "_type", "diaspora_handle", "person_id", "message" ]
          mongo_attrs.map{|attr_name| hash[attr_name]}
        end
    
    Raphael's avatar
    Raphael a validé
        #has to handle the polymorphic stuff
      end
    
    Raphael's avatar
    Raphael a validé
      def requests_json_to_csv model_hash
        model_hash[:attrs] = ["mongo_id", "recipient_mongo_id", "sender_mongo_id", "aspect_mongo_id"]
        generic_json_to_csv(model_hash) do |hash|
    
          mongo_attrs = ["_id", "to_id", "from_id", "into_id"]
          mongo_attrs.map{|attr_name| hash[attr_name]}
    
    Raphael's avatar
    Raphael a validé
        end
      end
    
    Raphael's avatar
    Raphael a validé
      def users_json_to_csv model_hash
        model_hash[:attrs] = ["mongo_id", "username", "serialized_private_key", "encrypted_password", "invites", "invitation_token", "invitation_sent_at", "getting_started", "disable_mail", "language", "last_sign_in_ip", "last_sign_in_at", "reset_password_token", "password_salt"]
        generic_json_to_csv(model_hash) do |hash|
    
          mongo_attrs = ["_id", "username", "serialized_private_key", "encrypted_password", "invites", "invitation_token", "invitation_sent_at", "getting_started", "disable_mail", "language", "last_sign_in_ip", "last_sign_in_at", "reset_password_token", "password_salt"]
          mongo_attrs.map{|attr_name| hash[attr_name]}
    
    Raphael's avatar
    Raphael a validé
        end
      end
    
    Raphael's avatar
    Raphael a validé
      def aspects_json_to_csv model_hash
    
        log "Converting aspects json to aspects and post_visibilities csvs"
    
    Raphael's avatar
    Raphael a validé
        model_hash[:main_attrs] = ["mongo_id", "name", "created_at", "updated_at"]
        #Post Visibilities
        model_hash[:join_table_name] = :post_visibilities
        model_hash[:join_table_attrs] = ["aspect_mongo_id", "post_mongo_id"]
    
        generic_json_to_two_csvs(model_hash) do |hash|
    
          mongo_attrs = ["_id", "name", "created_at", "updated_at"]
          main_row = mongo_attrs.map{|attr_name| hash[attr_name]}
    
    Raphael's avatar
    Raphael a validé
          post_visibility_rows = hash["post_ids"].map{|id| [hash["_id"],id]}
          [main_row, post_visibility_rows]
        end
      end
    
    Raphael's avatar
    Raphael a validé
      def generic_json_to_two_csvs model_hash
    
        log "Converting #{model_hash[:name]} json to two csvs"
    
    Raphael's avatar
    Raphael a validé
        json_file = File.open(model_hash[:json_file])
    
        main_csv = CSV.open("#{dirpath}/csv/#{model_hash[:name]}.csv",'w')
        main_csv << model_hash[:main_attrs]
    
        join_csv = CSV.open("#{dirpath}/csv/#{model_hash[:join_table_name]}.csv",'w')
        join_csv << model_hash[:join_table_attrs]
    
        json_file.each do |aspect_json|
          hash = JSON.parse(aspect_json)
          result = yield(hash)
          main_csv << result.first
          result.last.each{|row| join_csv << row}
        end
        json_file.close
        main_csv.close
        join_csv.close
      end