Add annual reports for accounts (#28693)

th-downstream
Eugen Rochko 10 months ago committed by GitHub
parent 01ce9df880
commit 5b1eb09d54
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,30 @@
# frozen_string_literal: true
class Api::V1::AnnualReportsController < Api::BaseController
before_action -> { doorkeeper_authorize! :read, :'read:accounts' }, only: :index
before_action -> { doorkeeper_authorize! :write, :'write:accounts' }, except: :index
before_action :require_user!
before_action :set_annual_report, except: :index
def index
with_read_replica do
@presenter = AnnualReportsPresenter.new(GeneratedAnnualReport.where(account_id: current_account.id).pending)
@relationships = StatusRelationshipsPresenter.new(@presenter.statuses, current_account.id)
end
render json: @presenter,
serializer: REST::AnnualReportsSerializer,
relationships: @relationships
end
def read
@annual_report.view!
render_empty
end
private
def set_annual_report
@annual_report = GeneratedAnnualReport.find_by!(account_id: current_account.id, year: params[:id])
end
end

@ -0,0 +1,43 @@
# frozen_string_literal: true
class AnnualReport
include DatabaseHelper
SOURCES = [
AnnualReport::Archetype,
AnnualReport::TypeDistribution,
AnnualReport::TopStatuses,
AnnualReport::MostUsedApps,
AnnualReport::CommonlyInteractedWithAccounts,
AnnualReport::TimeSeries,
AnnualReport::TopHashtags,
AnnualReport::MostRebloggedAccounts,
AnnualReport::Percentiles,
].freeze
SCHEMA = 1
def initialize(account, year)
@account = account
@year = year
end
def generate
return if GeneratedAnnualReport.exists?(account: @account, year: @year)
GeneratedAnnualReport.create(
account: @account,
year: @year,
schema_version: SCHEMA,
data: data
)
end
private
def data
with_read_replica do
SOURCES.each_with_object({}) { |klass, hsh| hsh.merge!(klass.new(@account, @year).generate) }
end
end
end

@ -0,0 +1,49 @@
# frozen_string_literal: true
class AnnualReport::Archetype < AnnualReport::Source
# Average number of posts (including replies and reblogs) made by
# each active user in a single year (2023)
AVERAGE_PER_YEAR = 113
def generate
{
archetype: archetype,
}
end
private
def archetype
if (standalone_count + replies_count + reblogs_count) < AVERAGE_PER_YEAR
:lurker
elsif reblogs_count > (standalone_count * 2)
:booster
elsif polls_count > (standalone_count * 0.1) # standalone_count includes posts with polls
:pollster
elsif replies_count > (standalone_count * 2)
:replier
else
:oracle
end
end
def polls_count
@polls_count ||= base_scope.where.not(poll_id: nil).count
end
def reblogs_count
@reblogs_count ||= base_scope.where.not(reblog_of_id: nil).count
end
def replies_count
@replies_count ||= base_scope.where.not(in_reply_to_id: nil).where.not(in_reply_to_account_id: @account.id).count
end
def standalone_count
@standalone_count ||= base_scope.without_replies.without_reblogs.count
end
def base_scope
@account.statuses.where(id: year_as_snowflake_range)
end
end

@ -0,0 +1,22 @@
# frozen_string_literal: true
class AnnualReport::CommonlyInteractedWithAccounts < AnnualReport::Source
SET_SIZE = 40
def generate
{
commonly_interacted_with_accounts: commonly_interacted_with_accounts.map do |(account_id, count)|
{
account_id: account_id,
count: count,
}
end,
}
end
private
def commonly_interacted_with_accounts
@account.statuses.reorder(nil).where(id: year_as_snowflake_range).where.not(in_reply_to_account_id: @account.id).group(:in_reply_to_account_id).having('count(*) > 1').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('in_reply_to_account_id, count(*) AS total'))
end
end

@ -0,0 +1,22 @@
# frozen_string_literal: true
class AnnualReport::MostRebloggedAccounts < AnnualReport::Source
SET_SIZE = 10
def generate
{
most_reblogged_accounts: most_reblogged_accounts.map do |(account_id, count)|
{
account_id: account_id,
count: count,
}
end,
}
end
private
def most_reblogged_accounts
@account.statuses.reorder(nil).where(id: year_as_snowflake_range).where.not(reblog_of_id: nil).joins(reblog: :account).group('accounts.id').having('count(*) > 1').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('accounts.id, count(*) as total'))
end
end

@ -0,0 +1,22 @@
# frozen_string_literal: true
class AnnualReport::MostUsedApps < AnnualReport::Source
SET_SIZE = 10
def generate
{
most_used_apps: most_used_apps.map do |(name, count)|
{
name: name,
count: count,
}
end,
}
end
private
def most_used_apps
@account.statuses.reorder(nil).where(id: year_as_snowflake_range).joins(:application).group('oauth_applications.name').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('oauth_applications.name, count(*) as total'))
end
end

@ -0,0 +1,62 @@
# frozen_string_literal: true
class AnnualReport::Percentiles < AnnualReport::Source
def generate
{
percentiles: {
followers: (total_with_fewer_followers / (total_with_any_followers + 1.0)) * 100,
statuses: (total_with_fewer_statuses / (total_with_any_statuses + 1.0)) * 100,
},
}
end
private
def followers_gained
@followers_gained ||= @account.passive_relationships.where("date_part('year', follows.created_at) = ?", @year).count
end
def statuses_created
@statuses_created ||= @account.statuses.where(id: year_as_snowflake_range).count
end
def total_with_fewer_followers
@total_with_fewer_followers ||= Follow.find_by_sql([<<~SQL.squish, { year: @year, comparison: followers_gained }]).first.total
WITH tmp0 AS (
SELECT follows.target_account_id
FROM follows
INNER JOIN accounts ON accounts.id = follows.target_account_id
WHERE date_part('year', follows.created_at) = :year
AND accounts.domain IS NULL
GROUP BY follows.target_account_id
HAVING COUNT(*) < :comparison
)
SELECT count(*) AS total
FROM tmp0
SQL
end
def total_with_fewer_statuses
@total_with_fewer_statuses ||= Status.find_by_sql([<<~SQL.squish, { comparison: statuses_created, min_id: year_as_snowflake_range.first, max_id: year_as_snowflake_range.last }]).first.total
WITH tmp0 AS (
SELECT statuses.account_id
FROM statuses
INNER JOIN accounts ON accounts.id = statuses.account_id
WHERE statuses.id BETWEEN :min_id AND :max_id
AND accounts.domain IS NULL
GROUP BY statuses.account_id
HAVING count(*) < :comparison
)
SELECT count(*) AS total
FROM tmp0
SQL
end
def total_with_any_followers
@total_with_any_followers ||= Follow.where("date_part('year', follows.created_at) = ?", @year).joins(:target_account).merge(Account.local).count('distinct follows.target_account_id')
end
def total_with_any_statuses
@total_with_any_statuses ||= Status.where(id: year_as_snowflake_range).joins(:account).merge(Account.local).count('distinct statuses.account_id')
end
end

@ -0,0 +1,16 @@
# frozen_string_literal: true
class AnnualReport::Source
attr_reader :account, :year
def initialize(account, year)
@account = account
@year = year
end
protected
def year_as_snowflake_range
(Mastodon::Snowflake.id_at(DateTime.new(year, 1, 1))..Mastodon::Snowflake.id_at(DateTime.new(year, 12, 31)))
end
end

@ -0,0 +1,30 @@
# frozen_string_literal: true
class AnnualReport::TimeSeries < AnnualReport::Source
def generate
{
time_series: (1..12).map do |month|
{
month: month,
statuses: statuses_per_month[month] || 0,
following: following_per_month[month] || 0,
followers: followers_per_month[month] || 0,
}
end,
}
end
private
def statuses_per_month
@statuses_per_month ||= @account.statuses.reorder(nil).where(id: year_as_snowflake_range).group(:period).pluck(Arel.sql("date_part('month', created_at)::int AS period, count(*)")).to_h
end
def following_per_month
@following_per_month ||= @account.active_relationships.where("date_part('year', created_at) = ?", @year).group(:period).pluck(Arel.sql("date_part('month', created_at)::int AS period, count(*)")).to_h
end
def followers_per_month
@followers_per_month ||= @account.passive_relationships.where("date_part('year', created_at) = ?", @year).group(:period).pluck(Arel.sql("date_part('month', created_at)::int AS period, count(*)")).to_h
end
end

@ -0,0 +1,22 @@
# frozen_string_literal: true
class AnnualReport::TopHashtags < AnnualReport::Source
SET_SIZE = 40
def generate
{
top_hashtags: top_hashtags.map do |(name, count)|
{
name: name,
count: count,
}
end,
}
end
private
def top_hashtags
Tag.joins(:statuses).where(statuses: { id: @account.statuses.where(id: year_as_snowflake_range).reorder(nil).select(:id) }).group(:id).having('count(*) > 1').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('COALESCE(tags.display_name, tags.name), count(*) AS total'))
end
end

@ -0,0 +1,21 @@
# frozen_string_literal: true
class AnnualReport::TopStatuses < AnnualReport::Source
def generate
top_reblogs = base_scope.order(reblogs_count: :desc).first&.id
top_favourites = base_scope.where.not(id: top_reblogs).order(favourites_count: :desc).first&.id
top_replies = base_scope.where.not(id: [top_reblogs, top_favourites]).order(replies_count: :desc).first&.id
{
top_statuses: {
by_reblogs: top_reblogs,
by_favourites: top_favourites,
by_replies: top_replies,
},
}
end
def base_scope
@account.statuses.with_public_visibility.joins(:status_stat).where(id: year_as_snowflake_range).reorder(nil)
end
end

@ -0,0 +1,20 @@
# frozen_string_literal: true
class AnnualReport::TypeDistribution < AnnualReport::Source
def generate
{
type_distribution: {
total: base_scope.count,
reblogs: base_scope.where.not(reblog_of_id: nil).count,
replies: base_scope.where.not(in_reply_to_id: nil).where.not(in_reply_to_account_id: @account.id).count,
standalone: base_scope.without_replies.without_reblogs.count,
},
}
end
private
def base_scope
@account.statuses.where(id: year_as_snowflake_range)
end
end

@ -0,0 +1,37 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: generated_annual_reports
#
# id :bigint(8) not null, primary key
# account_id :bigint(8) not null
# year :integer not null
# data :jsonb not null
# schema_version :integer not null
# viewed_at :datetime
# created_at :datetime not null
# updated_at :datetime not null
#
class GeneratedAnnualReport < ApplicationRecord
belongs_to :account
scope :pending, -> { where(viewed_at: nil) }
def viewed?
viewed_at.present?
end
def view!
update!(viewed_at: Time.now.utc)
end
def account_ids
data['most_reblogged_accounts'].pluck('account_id') + data['commonly_interacted_with_accounts'].pluck('account_id')
end
def status_ids
data['top_statuses'].values
end
end

@ -0,0 +1,23 @@
# frozen_string_literal: true
class AnnualReportsPresenter
alias read_attribute_for_serialization send
attr_reader :annual_reports
def initialize(annual_reports)
@annual_reports = annual_reports
end
def accounts
@accounts ||= Account.where(id: @annual_reports.flat_map(&:account_ids)).includes(:account_stat, :moved_to_account, user: :role)
end
def statuses
@statuses ||= Status.where(id: @annual_reports.flat_map(&:status_ids)).with_includes
end
def self.model_name
@model_name ||= ActiveModel::Name.new(self)
end
end

@ -0,0 +1,5 @@
# frozen_string_literal: true
class REST::AnnualReportSerializer < ActiveModel::Serializer
attributes :year, :data, :schema_version
end

@ -0,0 +1,7 @@
# frozen_string_literal: true
class REST::AnnualReportsSerializer < ActiveModel::Serializer
has_many :annual_reports, serializer: REST::AnnualReportSerializer
has_many :accounts, serializer: REST::AccountSerializer
has_many :statuses, serializer: REST::StatusSerializer
end

@ -0,0 +1,11 @@
# frozen_string_literal: true
class GenerateAnnualReportWorker
include Sidekiq::Worker
def perform(account_id, year)
AnnualReport.new(Account.find(account_id), year).generate
rescue ActiveRecord::RecordNotFound, ActiveRecord::RecordNotUnique
true
end
end

@ -24,6 +24,8 @@ class Scheduler::IndexingScheduler
end
end
private
def indexes
[AccountsIndex, TagsIndex, PublicStatusesIndex, StatusesIndex]
end

@ -51,6 +51,12 @@ namespace :api, format: false do
resources :scheduled_statuses, only: [:index, :show, :update, :destroy]
resources :preferences, only: [:index]
resources :annual_reports, only: [:index] do
member do
post :read
end
end
resources :announcements, only: [:index] do
scope module: :announcements do
resources :reactions, only: [:update, :destroy]

@ -0,0 +1,17 @@
# frozen_string_literal: true
class CreateGeneratedAnnualReports < ActiveRecord::Migration[7.1]
def change
create_table :generated_annual_reports do |t|
t.belongs_to :account, null: false, foreign_key: { on_cascade: :delete }, index: false
t.integer :year, null: false
t.jsonb :data, null: false
t.integer :schema_version, null: false
t.datetime :viewed_at
t.timestamps
end
add_index :generated_annual_reports, [:account_id, :year], unique: true
end
end

@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.1].define(version: 2024_01_09_103012) do
ActiveRecord::Schema[7.1].define(version: 2024_01_11_033014) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
@ -516,6 +516,17 @@ ActiveRecord::Schema[7.1].define(version: 2024_01_09_103012) do
t.index ["target_account_id"], name: "index_follows_on_target_account_id"
end
create_table "generated_annual_reports", force: :cascade do |t|
t.bigint "account_id", null: false
t.integer "year", null: false
t.jsonb "data", null: false
t.integer "schema_version", null: false
t.datetime "viewed_at"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["account_id", "year"], name: "index_generated_annual_reports_on_account_id_and_year", unique: true
end
create_table "identities", force: :cascade do |t|
t.string "provider", default: "", null: false
t.string "uid", default: "", null: false
@ -1226,6 +1237,7 @@ ActiveRecord::Schema[7.1].define(version: 2024_01_09_103012) do
add_foreign_key "follow_requests", "accounts", name: "fk_76d644b0e7", on_delete: :cascade
add_foreign_key "follows", "accounts", column: "target_account_id", name: "fk_745ca29eac", on_delete: :cascade
add_foreign_key "follows", "accounts", name: "fk_32ed1b5560", on_delete: :cascade
add_foreign_key "generated_annual_reports", "accounts"
add_foreign_key "identities", "users", name: "fk_bea040f377", on_delete: :cascade
add_foreign_key "imports", "accounts", name: "fk_6db1b6e408", on_delete: :cascade
add_foreign_key "invites", "users", on_delete: :cascade

Loading…
Cancel
Save