diff --git a/Changelog.md b/Changelog.md index 4b4407a6633d0aa3368cd00e924501f3d3e800ef..2a49237a89e93cfce9d433703832a2661c0aa0be 100644 --- a/Changelog.md +++ b/Changelog.md @@ -75,6 +75,7 @@ This is disabled by default since it requires the installation of additional pac * Convert remaining SASS stylesheets to SCSS [#5342](https://github.com/diaspora/diaspora/pull/5342) * Update rack-protection [#5403](https://github.com/diaspora/diaspora/pull/5403) * Cleanup diaspora.yml [#5426](https://github.com/diaspora/diaspora/pull/5426) +* Replace `opengraph_parser` with `open_graph_reader` [#5462](https://github.com/diaspora/diaspora/pull/5462) ## Bug fixes * orca cannot see 'Add Contact' button [#5158](https://github.com/diaspora/diaspora/pull/5158) diff --git a/Gemfile b/Gemfile index 101d789def28b558fd64fd576bc94069ad9f49ca..88e1d2e7de4721356285ad2eef4388329aaf184a 100644 --- a/Gemfile +++ b/Gemfile @@ -110,12 +110,12 @@ gem 'messagebus_ruby_api', '1.0.3' # Parsing -gem 'nokogiri', '1.6.4.1' -gem 'redcarpet', '3.2.0' -gem 'twitter-text', '1.10.0' -gem 'roxml', '3.1.6' -gem 'ruby-oembed', '0.8.11' -gem 'opengraph_parser', '0.2.3' +gem 'nokogiri', '1.6.4.1' +gem 'redcarpet', '3.2.0' +gem 'twitter-text', '1.10.0' +gem 'roxml', '3.1.6' +gem 'ruby-oembed', '0.8.11' +gem 'open_graph_reader', '0.3.1' # Services @@ -140,6 +140,7 @@ gem 'acts-as-taggable-on', '3.4.2' gem 'addressable', '2.3.6', :require => 'addressable/uri' gem 'faraday', '0.9.0' gem 'faraday_middleware', '0.9.1' +gem 'faraday-cookie_jar', '0.0.6' gem 'typhoeus', '0.6.9' # Views diff --git a/Gemfile.lock b/Gemfile.lock index 3dc8c4732310cbb45fb60fceda879b6e45942171..4d1476c3639a457be72fa9edaef8425ca3d88313 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -141,6 +141,8 @@ GEM http_parser.rb (~> 0.6) nokogiri (~> 1.6) diff-lcs (1.2.5) + domain_name (0.5.22) + unf (>= 0.0.5, < 1.0.0) eco (1.0.0) coffee-script eco-source @@ -165,6 +167,9 @@ GEM railties (>= 3.0.0) faraday (0.9.0) multipart-post (>= 1.2, < 3) + faraday-cookie_jar (0.0.6) + faraday (>= 0.7.4) + http-cookie (~> 1.0.0) faraday_middleware (0.9.1) faraday (>= 0.7.4, < 0.10) ffi (1.9.6) @@ -268,6 +273,8 @@ GEM hike (1.2.3) hiredis (0.5.2) hitimes (1.2.2) + http-cookie (1.0.2) + domain_name (~> 0.5) http_accept_language (2.0.2) http_parser.rb (0.6.0) i18n (0.6.11) @@ -361,9 +368,9 @@ GEM omniauth-oauth (~> 1.0) omniauth-wordpress (0.2.1) omniauth-oauth2 (~> 1.1.0) - opengraph_parser (0.2.3) - addressable - nokogiri + open_graph_reader (0.3.1) + faraday (~> 0.9.0) + nokogiri (~> 1.6) opennebula (4.10.1) json nokogiri @@ -618,6 +625,7 @@ DEPENDENCIES entypo-rails (= 2.2.2) factory_girl_rails (= 4.5.0) faraday (= 0.9.0) + faraday-cookie_jar (= 0.0.6) faraday_middleware (= 0.9.1) fixture_builder (= 0.3.6) fog (= 1.25.0) @@ -650,7 +658,7 @@ DEPENDENCIES omniauth-tumblr (= 1.1) omniauth-twitter (= 1.0.1) omniauth-wordpress (= 0.2.1) - opengraph_parser (= 0.2.3) + open_graph_reader (= 0.3.1) pry pry-byebug pry-debundle diff --git a/app/models/open_graph_cache.rb b/app/models/open_graph_cache.rb index 072d30a989405e9152aeef390ba2b43e50e99c2e..b1d357bbf3400b9d32102db5c86501ec9863998d 100644 --- a/app/models/open_graph_cache.rb +++ b/app/models/open_graph_cache.rb @@ -30,16 +30,17 @@ class OpenGraphCache < ActiveRecord::Base end def fetch_and_save_opengraph_data! - response = OpenGraph.new(self.url) + object = OpenGraphReader.fetch!(self.url) - return if response.blank? || response.type.blank? + return unless object - self.title = response.title.truncate(255) - self.ob_type = response.type - self.image = response.images[0] - self.url = response.url - self.description = response.description + self.title = object.og.title.truncate(255) + self.ob_type = object.og.type + self.image = object.og.image.url + self.url = object.og.url + self.description = object.og.description self.save + rescue OpenGraphReader::NoOpenGraphDataError end end diff --git a/config/initializers/faraday.rb b/config/initializers/faraday.rb index d22e92e35893481e806af0719fb5340907095da6..a1f57bb215971a9963331e9ab5e517ae37c75b78 100644 --- a/config/initializers/faraday.rb +++ b/config/initializers/faraday.rb @@ -18,6 +18,7 @@ options = { } Faraday.default_connection = Faraday::Connection.new(options) do |b| - b.use FaradayMiddleware::FollowRedirects + b.use FaradayMiddleware::FollowRedirects, limit: 8 + b.use :cookie_jar b.adapter Faraday.default_adapter end diff --git a/config/initializers/open_graph_reader.rb b/config/initializers/open_graph_reader.rb new file mode 100644 index 0000000000000000000000000000000000000000..68af98f12f399b2fd60f5475db4787da55b3f2ed --- /dev/null +++ b/config/initializers/open_graph_reader.rb @@ -0,0 +1,4 @@ +OpenGraphReader.configure do |config| + config.synthesize_title = true + config.synthesize_image_url = true +end diff --git a/lib/diaspora/message_renderer.rb b/lib/diaspora/message_renderer.rb index f0eb11bd8ed9caa636dbf1d2646e87f537f2c423..820da2a516d2fc276b73ebdc46d62ed912c4cde2 100644 --- a/lib/diaspora/message_renderer.rb +++ b/lib/diaspora/message_renderer.rb @@ -228,7 +228,7 @@ module Diaspora # Extracts all the urls from the raw message and return them in the form of a string # Different URLs are seperated with a space def urls - @urls ||= Twitter::Extractor.extract_urls(@raw_message) + @urls ||= Twitter::Extractor.extract_urls(plain_text_without_markdown) end def raw diff --git a/spec/lib/diaspora/message_renderer_spec.rb b/spec/lib/diaspora/message_renderer_spec.rb index 129415d631ae39f52040935acd24e6e4112bc6e2..fc73e5dffee87615e824347b9b101b6ea3084c7b 100644 --- a/spec/lib/diaspora/message_renderer_spec.rb +++ b/spec/lib/diaspora/message_renderer_spec.rb @@ -178,5 +178,10 @@ describe Diaspora::MessageRenderer do text = "[Perdu](http://perdu.com/) and [DuckDuckGo](https://duckduckgo.com/) can help you" expect(message(text).urls).to eql ["http://perdu.com/", "https://duckduckgo.com/"] end + + it "extracts urls from continous markdown correctly" do + text = "[](https://www.antifainfoblatt.de/artikel/%E2%80%9Eschlie%C3%9Flich-waren-es-zu-viele%E2%80%9C)" + expect(message(text).urls).to eq ["https://www.antifainfoblatt.de/sites/default/files/public/styles/front_full/public/jockpalfreeman.png?itok=OPjHKpmt", "https://www.antifainfoblatt.de/artikel/%E2%80%9Eschlie%C3%9Flich-waren-es-zu-viele%E2%80%9C"] + end end end diff --git a/spec/workers/gather_open_graph_data_spec.rb b/spec/workers/gather_open_graph_data_spec.rb index 3416047c426246a3b330fe441b9e2dafc1810362..ac7c4eecd6c6b4de21d49bf911342ac1e49ceba3 100644 --- a/spec/workers/gather_open_graph_data_spec.rb +++ b/spec/workers/gather_open_graph_data_spec.rb @@ -3,7 +3,7 @@ describe Workers::GatherOpenGraphData do before do @ogsite_title = 'Homepage' @ogsite_type = 'website' - @ogsite_image = '/img/something.png' + @ogsite_image = 'http://www.we-support-open-graph.com/img/something.png' @ogsite_url = 'http://www.we-support-open-graph.com' @ogsite_description = 'Homepage' @@ -31,9 +31,12 @@ describe Workers::GatherOpenGraphData do @status_message = FactoryGirl.create(:status_message) - stub_request(:get, @ogsite_url).to_return(:status => 200, :body => @ogsite_body) - stub_request(:get, @no_open_graph_url).to_return(:status => 200, :body => '<html><body>hello there</body></html>') - stub_request(:get, @oglong_url).to_return(:status => 200, :body => @oglong_body) + stub_request(:head, @ogsite_url).to_return(status: 200, body: "", headers: {'Content-Type' => 'text/html; utf-8'}) + stub_request(:get, @ogsite_url).to_return(status: 200, body: @ogsite_body, headers: {'Content-Type' => 'text/html; utf-8'}) + stub_request(:head, @no_open_graph_url).to_return(status: 200, body: "", headers: {'Content-Type' => 'text/html; utf-8'}) + stub_request(:get, @no_open_graph_url).to_return(:status => 200, :body => '<html><head><title>Hi</title><body>hello there</body></html>', headers: {'Content-Type' => 'text/html; utf-8'}) + stub_request(:head, @oglong_url).to_return(status: 200, body: "", headers: {'Content-Type' => 'text/html; utf-8'}) + stub_request(:get, @oglong_url).to_return(status: 200, body: @oglong_body, headers: {'Content-Type' => 'text/html; utf-8'}) end @@ -59,7 +62,7 @@ describe Workers::GatherOpenGraphData do expect(ogc.title).to eq(@ogsite_title) expect(ogc.ob_type).to eq(@ogsite_type) - expect(ogc.image).to eq(@ogsite_url + @ogsite_image) + expect(ogc.image).to eq(@ogsite_image) expect(ogc.url).to eq(@ogsite_url) expect(ogc.description).to eq(@ogsite_description)