diff --git a/Gemfile b/Gemfile index c7bb0be6ea50732c909869192656110df389aedc..d08a2d3a1855f80b51033a36df014e95cce49c78 100644 --- a/Gemfile +++ b/Gemfile @@ -77,7 +77,12 @@ gem 'SystemTimer', '1.2.1', :platforms => :ruby_18 gem 'hoptoad_notifier' gem 'newrelic_rpm', :require => false +# statistics + +gem 'statsample', :require => false + #mail + gem 'messagebus_ruby_api', '0.4.8' # tags diff --git a/Gemfile.lock b/Gemfile.lock index 68bab41628c34f798e70f2d6175f6229f316f42f..8a33b662c7941e11779f6b56faf20151fff8656f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -139,6 +139,7 @@ GEM uuidtools childprocess (0.2.2) ffi (~> 1.0.6) + clbustos-rtf (0.4.2) closure-compiler (1.1.4) cloudfiles (1.4.10) mime-types (>= 1.16) @@ -163,12 +164,15 @@ GEM devise (~> 1.3.1) rails (<= 3.2, >= 3.0.0) diff-lcs (1.1.3) + dirty-memoize (0.0.4) + distribution (0.6.0) em-synchrony (0.2.0) eventmachine (>= 0.12.9) erubis (2.6.6) abstract (>= 1.0.0) eventmachine (0.12.10) excon (0.2.4) + extendmatrix (0.3.1) extlib (0.9.15) factory_girl (2.1.2) activesupport @@ -249,6 +253,8 @@ GEM mime-types (1.16) mini_magick (3.2) subexec (~> 0.0.4) + minimization (0.2.1) + text-table (~> 1.2) mixlib-authentication (1.1.4) mixlib-log mixlib-cli (1.2.2) @@ -331,6 +337,16 @@ GEM parallel_tests (0.6.7) parallel polyglot (0.3.2) + prawn (0.8.4) + prawn-core (< 0.9, >= 0.8.4) + prawn-layout (< 0.9, >= 0.8.4) + prawn-security (< 0.9, >= 0.8.4) + prawn-core (0.8.4) + prawn-layout (0.8.4) + prawn-security (0.8.4) + prawn-svg (0.9.1.10) + prawn (>= 0.8.4) + prawn-core (>= 0.8.4) pyu-ruby-sasl (0.0.3.3) rack (1.2.4) rack-mobile-detect (0.3.0) @@ -366,6 +382,11 @@ GEM redis (2.2.2) redis-namespace (0.8.0) redis (< 3.0.0) + reportbuilder (1.4.1) + clbustos-rtf (~> 0.4.0) + prawn (~> 0.8.4) + prawn-svg (~> 0.9.1) + text-table (~> 1.2) resque (1.10.0) json (~> 1.4.6) redis-namespace (~> 0.8.0) @@ -378,6 +399,7 @@ GEM resque (~> 1.0) rest-client (1.6.1) mime-types (>= 1.16) + rserve-client (0.2.5) rspec (2.6.0) rspec-core (~> 2.6.0) rspec-expectations (~> 2.6.0) @@ -406,6 +428,7 @@ GEM linecache19 (>= 0.5.11) ruby-debug-base19 (>= 0.11.19) ruby-hmac (0.4.0) + ruby-ole (1.2.11.2) ruby-openid (2.1.8) ruby-openid-apps-discovery (1.2.0) ruby-openid (>= 2.1.7) @@ -413,6 +436,7 @@ GEM ruby_core_source (0.1.5) archive-tar-minitar (>= 0.5.2) rubyntlm (0.1.1) + rubyvis (0.4.1) rubyzip (0.9.4) sass (3.1.7) selenium-webdriver (2.7.0) @@ -425,10 +449,26 @@ GEM sinatra (1.2.7) rack (~> 1.1) tilt (>= 1.2.2, < 2.0) + spreadsheet (0.6.5.9) + ruby-ole (>= 1.0) sqlite3 (1.3.4) + statsample (1.1.0) + dirty-memoize (~> 0.0) + distribution (~> 0.3) + extendmatrix (~> 0.3.1) + fastercsv (> 0) + minimization (~> 0.2.0) + reportbuilder (~> 1.4) + rserve-client (~> 0.2.5) + rubyvis (~> 0.4.0) + spreadsheet (~> 0.6.5) + statsample-bivariate-extension (> 0) + statsample-bivariate-extension (1.1.0) + distribution (~> 0.6) subexec (0.0.4) systemu (2.4.0) term-ansicolor (1.0.6) + text-table (1.2.2) thin (1.2.11) daemons (>= 1.0.9) eventmachine (>= 0.12.6) @@ -537,6 +577,7 @@ DEPENDENCIES settingslogic (= 2.0.6) sod! sqlite3 + statsample thin (= 1.2.11) twitter (= 1.5.0) typhoeus diff --git a/app/controllers/admins_controller.rb b/app/controllers/admins_controller.rb index ad11611736f5b0d8fa0a99c5d987012777a1da11..b095dff478e1348ce268c86d4790bd53fc2ccff7 100644 --- a/app/controllers/admins_controller.rb +++ b/app/controllers/admins_controller.rb @@ -65,6 +65,10 @@ class AdminsController < ApplicationController end + def correlations + @correlations_hash = Statistics.new.generate_correlations + end + private def percent_change(today, yesterday) sprintf( "%0.02f", ((today-yesterday) / yesterday.to_f)*100).to_f diff --git a/app/views/admins/_admin_bar.haml b/app/views/admins/_admin_bar.haml index 5f261d9e0b792f03d1045e4b4805f07ccef55a7f..a366c187cc632e5acd280f35fe7adb0a1bc6efc8 100644 --- a/app/views/admins/_admin_bar.haml +++ b/app/views/admins/_admin_bar.haml @@ -5,6 +5,7 @@ %li= link_to 'User Search', user_search_path %li= link_to 'Weekly User Stats', weekly_user_stats_path %li= link_to 'Pod Stats', pod_stats_path + %li= link_to 'Correlations', correlations_path - if AppConfig[:mount_resque_web] %li= link_to 'Resque Overview', resque_web_path diff --git a/app/views/admins/correlations.haml b/app/views/admins/correlations.haml new file mode 100644 index 0000000000000000000000000000000000000000..499b27bbe64f7e05ae952f81756ac06359546009 --- /dev/null +++ b/app/views/admins/correlations.haml @@ -0,0 +1,13 @@ + +.span-24 + = render :partial => 'admins/admin_bar.haml' +%br +%br + +.span-24.last + %h1 + = "Correlations with Sign In Count:" + %ul + - @correlations_hash.keys.each do |k| + %li + = "#{k.to_s}, #{@correlations_hash[k]}" diff --git a/config/routes.rb b/config/routes.rb index fd90b25c7c42406e643ddc7b8cb43f169131511d..ed28dd963ad31188dac794e5d4c8d34248f4aeb2 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -97,6 +97,7 @@ Diaspora::Application.routes.draw do match :user_search get :admin_inviter get :weekly_user_stats + get :correlations get :stats, :as => 'pod_stats' end diff --git a/lib/statistics.rb b/lib/statistics.rb new file mode 100644 index 0000000000000000000000000000000000000000..d8d25d10f8286448016c5d2abcdee36e1a910258 --- /dev/null +++ b/lib/statistics.rb @@ -0,0 +1,135 @@ +require 'statsample' + +class Statistics + + attr_reader :start_time, + :range + + def initialize + #@start_time = start_time + #@range = range + end + + def posts_count_sql + <<SQL + SELECT users.id AS id, count(posts.id) AS count + FROM users + JOIN people ON people.owner_id = users.id + LEFT OUTER JOIN posts ON people.id = posts.author_id + #{self.where_clause_sql} + GROUP BY users.id +SQL + end + + def invites_sent_count_sql + <<SQL + SELECT users.id AS id, count(invitations.id) AS count + FROM users + LEFT OUTER JOIN invitations ON users.id = invitations.sender_id + #{self.where_clause_sql} + GROUP BY users.id +SQL + end + + def tags_followed_count_sql + <<SQL + SELECT users.id AS id, count(tag_followings.id) AS count + FROM users + LEFT OUTER JOIN tag_followings on users.id = tag_followings.user_id + #{self.where_clause_sql} + GROUP BY users.id +SQL + end + + def mentions_count_sql + <<SQL + SELECT users.id AS id, count(mentions.id) AS count + FROM users + JOIN people on users.id = people.owner_id + LEFT OUTER JOIN mentions on people.id = mentions.person_id + #{self.where_clause_sql} + GROUP BY users.id +SQL + end + + def contacts_sharing_with_count_sql + <<SQL + SELECT users.id AS id, count(contacts.id) AS count + FROM users + JOIN contacts on contacts.user_id = users.id + JOIN aspect_memberships on aspect_memberships.contact_id = contacts.id + #{self.where_clause_sql} + GROUP BY users.id +SQL + end + + def sign_in_count_sql + <<SQL + SELECT users.id AS id, users.sign_in_count AS count + FROM users + #{self.where_clause_sql} +SQL + end + + def correlate(first_metric, second_metric) + + # [{"id" => 1 , "count" => 123}] + + x_array = [] + y_array = [] + + self.result_hash(first_metric).keys.each do |k| + if val = self.result_hash(second_metric)[k] + x_array << self.result_hash(first_metric)[k] + y_array << val + end + end + + correlation(x_array, y_array) + end + + def generate_correlations + result = {} + [:posts_count, :invites_sent_count, :tags_followed_count, + :mentions_count, :contacts_sharing_with_count].each do |metric| + result[metric] = self.correlate(metric,:sign_in_count) + end + result + end + + + def correlation(x_array, y_array) + x = x_array.to_scale + y = y_array.to_scale + pearson = Statsample::Bivariate::Pearson.new(x,y) + pearson.r + end + + ### % of cohort came back last week + def retention(n) + week_created(n).where("current_sign_in_at > ?", Time.now - 1.week).count.to_f/week_created(n).count + end + + protected + def where_clause_sql + "where users.created_at > FROM_UNIXTIME(#{(Time.now - 1.month).to_i})" + end + + def week_created(n) + User.where("username IS NOT NULL").where("created_at > ? and created_at < ?", Time.now - (n+1).weeks, Time.now - n.weeks) + end + + #@param [Symbol] input type + #@returns [Hash] of resulting query + def result_hash(type) + instance_hash = self.instance_variable_get("@#{type.to_s}_hash".to_sym) + unless instance_hash + post_count_array = User.connection.select_all(self.send("#{type.to_s}_sql".to_sym)) + + instance_hash = {} + post_count_array.each{ |h| instance_hash[h['id']] = h["count"]} + self.instance_variable_set("@#{type.to_s}_hash".to_sym, instance_hash) + end + instance_hash + end +end diff --git a/spec/lib/statistics_spec.rb b/spec/lib/statistics_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..f2144bbe539eec7bdb6e4986be5505e2a11cea12 --- /dev/null +++ b/spec/lib/statistics_spec.rb @@ -0,0 +1,146 @@ +require 'spec_helper' +require 'lib/statistics' + +describe Statistics do + + before do + @time = Time.now + @stats = Statistics.new#(@time, @time - 1.week) + @result = [{"id" => alice.id , "count" => 0 }, + {"id" => bob.id , "count" => 1 }, + {"id" => eve.id , "count" => 0 }, + {"id" => local_luke.id , "count" => 0 }, + {"id" => local_leia.id , "count" => 0 }] + end + + describe '#posts_count_sql' do + it "pulls back an array of post counts and ids" do + Factory.create(:status_message, :author => bob.person) + User.connection.select_all(@stats.posts_count_sql).should =~ @result + end + end + + describe '#invites_sent_count_sql' do + it "pulls back an array of invite counts and ids" do + Invitation.batch_invite(["a@a.com"], :sender => bob, :aspect => bob.aspects.first, :service => 'email') + User.connection.select_all(@stats.invites_sent_count_sql).should =~ @result + end + end + + describe '#tags_followed_count_sql' do + it "pulls back an array of tag following counts and ids" do + TagFollowing.create!(:user => bob, :tag_id => 1) + User.connection.select_all(@stats.tags_followed_count_sql).should =~ @result + end + end + + describe '#mentions_count_sql' do + it "pulls back an array of mentions following counts and ids" do + post = Factory.create(:status_message, :author => bob.person) + Mention.create(:post => post, :person => bob.person) + User.connection.select_all(@stats.mentions_count_sql).should =~ @result + end + end + + describe '#contacts_sharing_with_count_sql' do + it "pulls back an array of mentions following counts and ids" do + # bob is sharing with alice and eve in the spec setup + alice.share_with(eve.person, alice.aspects.first) + @result = [{"id" => alice.id , "count" => 2 }, + {"id" => bob.id , "count" => 2 }, + {"id" => eve.id , "count" => 1 }, + {"id" => local_luke.id , "count" => 2 }, + {"id" => local_leia.id , "count" => 2 }] + + User.connection.select_all(@stats.contacts_sharing_with_count_sql).should =~ @result + end + end + + describe '#sign_in_count_sql' do + it "pulls back an array of sign_in_counts and ids" do + bob.sign_in_count = 1 + bob.save! + User.connection.select_all(@stats.sign_in_count_sql).should =~ @result + end + end + + ["posts_count", "invites_sent_count", "tags_followed_count", + "mentions_count", "sign_in_count", "contacts_sharing_with_count" ].each do |method| + + it "#{method}_sql calls where_sql" do + @stats.should_receive(:where_clause_sql) + + @stats.send("#{method}_sql".to_sym) + end + + if method != "sign_in_count" + it "#generate_correlations calss correlate with #{method} and sign_in_count" do + @stats.stub(:correlate).and_return(0.5) + @stats.should_receive(:correlate).with(method.to_sym,:sign_in_count).and_return(0.75) + @stats.generate_correlations + end + end + end + + describe "#correlation" do + it 'returns the correlation coefficient' do + @stats.correlation([1,2],[1,2]).to_s.should == 1.0.to_s + @stats.correlation([1,2,1,2],[1,1,2,2]).to_s.should == 0.0.to_s + end + end + describe "#generate_correlations" do + + it 'it returns a hash of including start and end time' do + pending + hash = @stats.correlation_hash + hash[:start_time].should == @time + hash[:end_time].should == @time - 1.week + end + + it 'returns the post count (and sign_in_count) correlation' do + bob.sign_in_count = 1 + bob.post(:status_message, :text => "here is a message") + bob.save! + + @stats.generate_correlations[:posts_count].to_s.should == "1.0" + end + end + + describe "#correlate" do + it 'calls correlation with post' do + User.connection.should_receive(:select_all).and_return([{"id"=> 1, "count" => 7}, + {"id" => 2, "count" => 8}, + {"id" => 3, "count" => 9}], + [{"id"=> 1, "count" => 17}, + {"id" => 3, "count" => 19}] + ) + + @stats.should_receive(:correlation).with([7,9],[17,19]).and_return(0.5) + @stats.correlate(:posts_count,:sign_in_count).should == 0.5 + end + end + + + + + context 'todos' do + before do + pending + end + + # requires a threshold + + describe '#disabled_email_count_sql' do + end + + # binary things + describe '#completed_getting_started_count_sql' do + end + + describe 'used_cubbies_sql' do + end + + describe '.sign_up_method_sql' do + end + end +end