Improve performance of follow recommendation scheduler (#16159)
Express follow_recommendations in terms of account_summaries rather than accounts, integrate filters that are unconditionally used, and materialize the resulting view. This should result in the bulk of the computation being performed only once instead of **once per recommendation language**.
This commit is contained in:
		
							parent
							
								
									5b255015f5
								
							
						
					
					
						commit
						78eddfc83c
					
				
					 5 changed files with 75 additions and 16 deletions
				
			
		|  | @ -14,9 +14,11 @@ class FollowRecommendation < ApplicationRecord | ||||||
|   belongs_to :account_summary, foreign_key: :account_id |   belongs_to :account_summary, foreign_key: :account_id | ||||||
|   belongs_to :account, foreign_key: :account_id |   belongs_to :account, foreign_key: :account_id | ||||||
| 
 | 
 | ||||||
|   scope :safe, -> { joins(:account_summary).merge(AccountSummary.safe) } |  | ||||||
|   scope :localized, ->(locale) { joins(:account_summary).merge(AccountSummary.localized(locale)) } |   scope :localized, ->(locale) { joins(:account_summary).merge(AccountSummary.localized(locale)) } | ||||||
|   scope :filtered, -> { joins(:account_summary).merge(AccountSummary.filtered) } | 
 | ||||||
|  |   def self.refresh | ||||||
|  |     Scenic.database.refresh_materialized_view(table_name, concurrently: true, cascade: false) | ||||||
|  |   end | ||||||
| 
 | 
 | ||||||
|   def readonly? |   def readonly? | ||||||
|     true |     true | ||||||
|  |  | ||||||
|  | @ -14,13 +14,14 @@ class Scheduler::FollowRecommendationsScheduler | ||||||
|   def perform |   def perform | ||||||
|     # Maintaining a materialized view speeds-up subsequent queries significantly |     # Maintaining a materialized view speeds-up subsequent queries significantly | ||||||
|     AccountSummary.refresh |     AccountSummary.refresh | ||||||
|  |     FollowRecommendation.refresh | ||||||
| 
 | 
 | ||||||
|     fallback_recommendations = FollowRecommendation.safe.filtered.limit(SET_SIZE).index_by(&:account_id) |     fallback_recommendations = FollowRecommendation.limit(SET_SIZE).index_by(&:account_id) | ||||||
| 
 | 
 | ||||||
|     I18n.available_locales.each do |locale| |     I18n.available_locales.each do |locale| | ||||||
|       recommendations = begin |       recommendations = begin | ||||||
|         if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist |         if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist | ||||||
|           FollowRecommendation.safe.filtered.localized(locale).limit(SET_SIZE).index_by(&:account_id) |           FollowRecommendation.localized(locale).limit(SET_SIZE).index_by(&:account_id) | ||||||
|         else |         else | ||||||
|           {} |           {} | ||||||
|         end |         end | ||||||
|  |  | ||||||
|  | @ -0,0 +1,18 @@ | ||||||
|  | class UpdateFollowRecommendationsToVersion2 < ActiveRecord::Migration[6.1] | ||||||
|  |   # We're switching from a normal to a materialized view so we need | ||||||
|  |   # custom `up` and `down` paths. | ||||||
|  | 
 | ||||||
|  |   def up | ||||||
|  |     drop_view :follow_recommendations | ||||||
|  |     create_view :follow_recommendations, version: 2, materialized: true | ||||||
|  | 
 | ||||||
|  |     # To be able to refresh the view concurrently, | ||||||
|  |     # at least one unique index is required | ||||||
|  |     safety_assured { add_index :follow_recommendations, :account_id, unique: true } | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|  |   def down | ||||||
|  |     drop_view :follow_recommendations, materialized: true | ||||||
|  |     create_view :follow_recommendations, version: 1 | ||||||
|  |   end | ||||||
|  | end | ||||||
							
								
								
									
										28
									
								
								db/schema.rb
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								db/schema.rb
									
									
									
									
									
								
							|  | @ -10,7 +10,7 @@ | ||||||
| # | # | ||||||
| # It's strongly recommended that you check this file into your version control system. | # It's strongly recommended that you check this file into your version control system. | ||||||
| 
 | 
 | ||||||
| ActiveRecord::Schema.define(version: 2021_04_25_135952) do | ActiveRecord::Schema.define(version: 2021_05_05_174616) do | ||||||
| 
 | 
 | ||||||
|   # These are extensions that must be enabled in order to support this database |   # These are extensions that must be enabled in order to support this database | ||||||
|   enable_extension "plpgsql" |   enable_extension "plpgsql" | ||||||
|  | @ -1114,30 +1114,34 @@ ActiveRecord::Schema.define(version: 2021_04_25_135952) do | ||||||
|   SQL |   SQL | ||||||
|   add_index "account_summaries", ["account_id"], name: "index_account_summaries_on_account_id", unique: true |   add_index "account_summaries", ["account_id"], name: "index_account_summaries_on_account_id", unique: true | ||||||
| 
 | 
 | ||||||
|   create_view "follow_recommendations", sql_definition: <<-SQL |   create_view "follow_recommendations", materialized: true, sql_definition: <<-SQL | ||||||
|       SELECT t0.account_id, |       SELECT t0.account_id, | ||||||
|       sum(t0.rank) AS rank, |       sum(t0.rank) AS rank, | ||||||
|       array_agg(t0.reason) AS reason |       array_agg(t0.reason) AS reason | ||||||
|      FROM ( SELECT accounts.id AS account_id, |      FROM ( SELECT account_summaries.account_id, | ||||||
|               ((count(follows.id))::numeric / (1.0 + (count(follows.id))::numeric)) AS rank, |               ((count(follows.id))::numeric / (1.0 + (count(follows.id))::numeric)) AS rank, | ||||||
|               'most_followed'::text AS reason |               'most_followed'::text AS reason | ||||||
|              FROM ((follows |              FROM (((follows | ||||||
|                JOIN accounts ON ((accounts.id = follows.target_account_id))) |                JOIN account_summaries ON ((account_summaries.account_id = follows.target_account_id))) | ||||||
|                JOIN users ON ((users.account_id = follows.account_id))) |                JOIN users ON ((users.account_id = follows.account_id))) | ||||||
|             WHERE ((users.current_sign_in_at >= (now() - 'P30D'::interval)) AND (accounts.suspended_at IS NULL) AND (accounts.moved_to_account_id IS NULL) AND (accounts.silenced_at IS NULL) AND (accounts.locked = false) AND (accounts.discoverable = true)) |                LEFT JOIN follow_recommendation_suppressions ON ((follow_recommendation_suppressions.account_id = follows.target_account_id))) | ||||||
|             GROUP BY accounts.id |             WHERE ((users.current_sign_in_at >= (now() - 'P30D'::interval)) AND (account_summaries.sensitive = false) AND (follow_recommendation_suppressions.id IS NULL)) | ||||||
|  |             GROUP BY account_summaries.account_id | ||||||
|            HAVING (count(follows.id) >= 5) |            HAVING (count(follows.id) >= 5) | ||||||
|           UNION ALL |           UNION ALL | ||||||
|            SELECT accounts.id AS account_id, |            SELECT account_summaries.account_id, | ||||||
|               (sum((status_stats.reblogs_count + status_stats.favourites_count)) / (1.0 + sum((status_stats.reblogs_count + status_stats.favourites_count)))) AS rank, |               (sum((status_stats.reblogs_count + status_stats.favourites_count)) / (1.0 + sum((status_stats.reblogs_count + status_stats.favourites_count)))) AS rank, | ||||||
|               'most_interactions'::text AS reason |               'most_interactions'::text AS reason | ||||||
|              FROM ((status_stats |              FROM (((status_stats | ||||||
|                JOIN statuses ON ((statuses.id = status_stats.status_id))) |                JOIN statuses ON ((statuses.id = status_stats.status_id))) | ||||||
|                JOIN accounts ON ((accounts.id = statuses.account_id))) |                JOIN account_summaries ON ((account_summaries.account_id = statuses.account_id))) | ||||||
|             WHERE ((statuses.id >= (((date_part('epoch'::text, (now() - 'P30D'::interval)) * (1000)::double precision))::bigint << 16)) AND (accounts.suspended_at IS NULL) AND (accounts.moved_to_account_id IS NULL) AND (accounts.silenced_at IS NULL) AND (accounts.locked = false) AND (accounts.discoverable = true)) |                LEFT JOIN follow_recommendation_suppressions ON ((follow_recommendation_suppressions.account_id = statuses.account_id))) | ||||||
|             GROUP BY accounts.id |             WHERE ((statuses.id >= (((date_part('epoch'::text, (now() - 'P30D'::interval)) * (1000)::double precision))::bigint << 16)) AND (account_summaries.sensitive = false) AND (follow_recommendation_suppressions.id IS NULL)) | ||||||
|  |             GROUP BY account_summaries.account_id | ||||||
|            HAVING (sum((status_stats.reblogs_count + status_stats.favourites_count)) >= (5)::numeric)) t0 |            HAVING (sum((status_stats.reblogs_count + status_stats.favourites_count)) >= (5)::numeric)) t0 | ||||||
|     GROUP BY t0.account_id |     GROUP BY t0.account_id | ||||||
|     ORDER BY (sum(t0.rank)) DESC; |     ORDER BY (sum(t0.rank)) DESC; | ||||||
|   SQL |   SQL | ||||||
|  |   add_index "follow_recommendations", ["account_id"], name: "index_follow_recommendations_on_account_id", unique: true | ||||||
|  | 
 | ||||||
| end | end | ||||||
|  |  | ||||||
							
								
								
									
										34
									
								
								db/views/follow_recommendations_v02.sql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								db/views/follow_recommendations_v02.sql
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,34 @@ | ||||||
|  | SELECT | ||||||
|  |   account_id, | ||||||
|  |   sum(rank) AS rank, | ||||||
|  |   array_agg(reason) AS reason | ||||||
|  | FROM ( | ||||||
|  |   SELECT | ||||||
|  |     account_summaries.account_id AS account_id, | ||||||
|  |     count(follows.id) / (1.0 + count(follows.id)) AS rank, | ||||||
|  |     'most_followed' AS reason | ||||||
|  |   FROM follows | ||||||
|  |   INNER JOIN account_summaries ON account_summaries.account_id = follows.target_account_id | ||||||
|  |   INNER JOIN users ON users.account_id = follows.account_id | ||||||
|  |   LEFT OUTER JOIN follow_recommendation_suppressions ON follow_recommendation_suppressions.account_id = follows.target_account_id | ||||||
|  |   WHERE users.current_sign_in_at >= (now() - interval '30 days') | ||||||
|  |     AND account_summaries.sensitive = 'f' | ||||||
|  |     AND follow_recommendation_suppressions.id IS NULL | ||||||
|  |   GROUP BY account_summaries.account_id | ||||||
|  |   HAVING count(follows.id) >= 5 | ||||||
|  |   UNION ALL | ||||||
|  |   SELECT account_summaries.account_id AS account_id, | ||||||
|  |          sum(reblogs_count + favourites_count) / (1.0 + sum(reblogs_count + favourites_count)) AS rank, | ||||||
|  |          'most_interactions' AS reason | ||||||
|  |   FROM status_stats | ||||||
|  |   INNER JOIN statuses ON statuses.id = status_stats.status_id | ||||||
|  |   INNER JOIN account_summaries ON account_summaries.account_id = statuses.account_id | ||||||
|  |   LEFT OUTER JOIN follow_recommendation_suppressions ON follow_recommendation_suppressions.account_id = statuses.account_id | ||||||
|  |   WHERE statuses.id >= ((date_part('epoch', now() - interval '30 days') * 1000)::bigint << 16) | ||||||
|  |     AND account_summaries.sensitive = 'f' | ||||||
|  |     AND follow_recommendation_suppressions.id IS NULL | ||||||
|  |   GROUP BY account_summaries.account_id | ||||||
|  |   HAVING sum(reblogs_count + favourites_count) >= 5 | ||||||
|  | ) t0 | ||||||
|  | GROUP BY account_id | ||||||
|  | ORDER BY rank DESC | ||||||
		Loading…
	
		Reference in a new issue