channel: use YT API + extractors to fetch videos

This commit is contained in:
Samantaz Fox 2022-11-11 20:26:34 +01:00
parent c5ee2bfc0f
commit 2903e896ec
No known key found for this signature in database
GPG Key ID: F42821059186176E
5 changed files with 127 additions and 104 deletions

View File

@ -180,11 +180,16 @@ def fetch_channel(ucid, pull_all_videos : Bool)
LOGGER.trace("fetch_channel: #{ucid} : author = #{author}, auto_generated = #{auto_generated}") LOGGER.trace("fetch_channel: #{ucid} : author = #{author}, auto_generated = #{auto_generated}")
page = 1 channel = InvidiousChannel.new({
id: ucid,
author: author,
updated: Time.utc,
deleted: false,
subscribed: nil,
})
LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page") LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page")
initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated) videos, continuation = IV::Channel::Tabs.get_videos(channel)
videos = extract_videos(initial_data, author, ucid)
LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed") LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed")
rss.xpath_nodes("//feed/entry").each do |entry| rss.xpath_nodes("//feed/entry").each do |entry|
@ -197,7 +202,9 @@ def fetch_channel(ucid, pull_all_videos : Bool)
views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64? views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64?
views ||= 0_i64 views ||= 0_i64
channel_video = videos.select { |video| video.id == video_id }[0]? channel_video = videos
.select(SearchVideo)
.select(&.id.== video_id)[0]?
length_seconds = channel_video.try &.length_seconds length_seconds = channel_video.try &.length_seconds
length_seconds ||= 0 length_seconds ||= 0
@ -235,16 +242,14 @@ def fetch_channel(ucid, pull_all_videos : Bool)
end end
if pull_all_videos if pull_all_videos
page += 1
ids = [] of String
loop do loop do
initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated) # Keep fetching videos using the continuation token retrieved earlier
videos = extract_videos(initial_data, author, ucid) videos, continuation = IV::Channel::Tabs.get_videos(channel, continuation: continuation)
count = videos.size count = 0
videos = videos.map { |video| ChannelVideo.new({ videos.select(SearchVideo).each do |video|
count += 1
video = ChannelVideo.new({
id: video.id, id: video.id,
title: video.title, title: video.title,
published: video.published, published: video.published,
@ -255,10 +260,7 @@ def fetch_channel(ucid, pull_all_videos : Bool)
live_now: video.live_now, live_now: video.live_now,
premiere_timestamp: video.premiere_timestamp, premiere_timestamp: video.premiere_timestamp,
views: video.views, views: video.views,
}) } })
videos.each do |video|
ids << video.id
# We are notified of Red videos elsewhere (PubSub), which includes a correct published date, # We are notified of Red videos elsewhere (PubSub), which includes a correct published date,
# so since they don't provide a published date here we can safely ignore them. # so since they don't provide a published date here we can safely ignore them.
@ -269,17 +271,10 @@ def fetch_channel(ucid, pull_all_videos : Bool)
end end
break if count < 25 break if count < 25
page += 1 sleep 500.milliseconds
end end
end end
channel = InvidiousChannel.new({ channel.updated = Time.utc
id: ucid,
author: author,
updated: Time.utc,
deleted: false,
subscribed: nil,
})
return channel return channel
end end

View File

@ -24,7 +24,7 @@ def fetch_channel_playlists(ucid, author, continuation, sort_by)
initial_data = YoutubeAPI.browse(ucid, params: params || "") initial_data = YoutubeAPI.browse(ucid, params: params || "")
end end
return extract_items(initial_data, ucid, author) return extract_items(initial_data, author, ucid)
end end
# ## NOTE: DEPRECATED # ## NOTE: DEPRECATED

View File

@ -16,6 +16,14 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
.try { |i| Base64.urlsafe_encode(i) } .try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) } .try { |i| URI.encode_www_form(i) }
sort_by_numerical =
case sort_by
when "newest" then 1_i64
when "popular" then 2_i64
when "oldest" then 3_i64 # Broken as of 10/2022 :c
else 1_i64 # Fallback to "newest"
end
object_inner_1 = { object_inner_1 = {
"110:embedded" => { "110:embedded" => {
"3:embedded" => { "3:embedded" => {
@ -24,7 +32,7 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
"1:string" => object_inner_2_encoded, "1:string" => object_inner_2_encoded,
"2:string" => "00000000-0000-0000-0000-000000000000", "2:string" => "00000000-0000-0000-0000-000000000000",
}, },
"3:varint" => 1_i64, "3:varint" => sort_by_numerical,
}, },
}, },
}, },
@ -52,34 +60,66 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
return continuation return continuation
end end
def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
continuation = produce_channel_videos_continuation(ucid, page,
auto_generated: auto_generated, sort_by: sort_by, v2: true)
return YoutubeAPI.browse(continuation)
end
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
videos = [] of SearchVideo
# 2.times do |i|
# initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
initial_data = get_channel_videos_response(ucid, 1, auto_generated: auto_generated, sort_by: sort_by)
videos = extract_videos(initial_data, author, ucid)
# end
return videos.size, videos
end
def get_latest_videos(ucid)
initial_data = get_channel_videos_response(ucid)
author = initial_data["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
return extract_videos(initial_data, author, ucid)
end
# Used in bypass_captcha_job.cr # Used in bypass_captcha_job.cr
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false) def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
continuation = produce_channel_videos_continuation(ucid, page, auto_generated, sort_by, v2) continuation = produce_channel_videos_continuation(ucid, page, auto_generated, sort_by, v2)
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en" return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
end end
module Invidious::Channel::Tabs
extend self
# -------------------
# Regular videos
# -------------------
def make_initial_video_ctoken(ucid, sort_by) : String
return produce_channel_videos_continuation(ucid, sort_by: sort_by)
end
# Wrapper for AboutChannel, as we still need to call get_videos with
# an author name and ucid directly (e.g in RSS feeds).
# TODO: figure out how to get rid of that
def get_videos(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest")
return get_videos(
channel.author, channel.ucid,
continuation: continuation, sort_by: sort_by
)
end
# Wrapper for InvidiousChannel, as we still need to call get_videos with
# an author name and ucid directly (e.g in RSS feeds).
# TODO: figure out how to get rid of that
def get_videos(channel : InvidiousChannel, *, continuation : String? = nil, sort_by = "newest")
return get_videos(
channel.author, channel.id,
continuation: continuation, sort_by: sort_by
)
end
def get_videos(author : String, ucid : String, *, continuation : String? = nil, sort_by = "newest")
continuation ||= make_initial_video_ctoken(ucid, sort_by)
initial_data = YoutubeAPI.browse(continuation: continuation)
return extract_items(initial_data, author, ucid)
end
def get_60_videos(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest")
if continuation.nil?
# Fetch the first "page" of video
items, next_continuation = get_videos(channel, sort_by: sort_by)
else
# Fetch a "page" of videos using the given continuation token
items, next_continuation = get_videos(channel, continuation: continuation)
end
# If there is more to load, then load a second "page"
# and replace the previous continuation token
if !next_continuation.nil?
items_2, next_continuation = get_videos(channel, continuation: next_continuation)
items.concat items_2
end
return items, next_continuation
end
end

View File

@ -5,8 +5,6 @@ module Invidious::Routes::API::V1::Channels
env.response.content_type = "application/json" env.response.content_type = "application/json"
ucid = env.params.url["ucid"] ucid = env.params.url["ucid"]
sort_by = env.params.query["sort_by"]?.try &.downcase
sort_by ||= "newest"
begin begin
channel = get_about_info(ucid, locale) channel = get_about_info(ucid, locale)
@ -19,17 +17,14 @@ module Invidious::Routes::API::V1::Channels
return error_json(500, ex) return error_json(500, ex)
end end
page = 1 # Retrieve "sort by" setting from URL parameters
if channel.auto_generated sort_by = env.params.query["sort_by"]?.try &.downcase || "newest"
videos = [] of SearchVideo
count = 0
else
begin begin
count, videos = get_60_videos(channel.ucid, channel.author, page, channel.auto_generated, sort_by) videos, _ = Channel::Tabs.get_videos(channel, sort_by: sort_by)
rescue ex rescue ex
return error_json(500, ex) return error_json(500, ex)
end end
end
JSON.build do |json| JSON.build do |json|
# TODO: Refactor into `to_json` for InvidiousChannel # TODO: Refactor into `to_json` for InvidiousChannel
@ -134,25 +129,11 @@ module Invidious::Routes::API::V1::Channels
end end
def self.latest(env) def self.latest(env)
locale = env.get("preferences").as(Preferences).locale # Remove parameters that could affect this endpoint's behavior
env.params.query.delete("sort_by") if env.params.query.has_key?("sort_by")
env.params.query.delete("continuation") if env.params.query.has_key?("continuation")
env.response.content_type = "application/json" return self.videos(env)
ucid = env.params.url["ucid"]
begin
videos = get_latest_videos(ucid)
rescue ex
return error_json(500, ex)
end
JSON.build do |json|
json.array do
videos.each do |video|
video.to_json(locale, json)
end
end
end
end end
def self.videos(env) def self.videos(env)
@ -161,11 +142,6 @@ module Invidious::Routes::API::V1::Channels
env.response.content_type = "application/json" env.response.content_type = "application/json"
ucid = env.params.url["ucid"] ucid = env.params.url["ucid"]
page = env.params.query["page"]?.try &.to_i?
page ||= 1
sort_by = env.params.query["sort"]?.try &.downcase
sort_by ||= env.params.query["sort_by"]?.try &.downcase
sort_by ||= "newest"
begin begin
channel = get_about_info(ucid, locale) channel = get_about_info(ucid, locale)
@ -178,18 +154,28 @@ module Invidious::Routes::API::V1::Channels
return error_json(500, ex) return error_json(500, ex)
end end
# Retrieve some URL parameters
sort_by = env.params.query["sort_by"]?.try &.downcase || "newest"
continuation = env.params.query["continuation"]?
begin begin
count, videos = get_60_videos(channel.ucid, channel.author, page, channel.auto_generated, sort_by) videos, next_continuation = Channel::Tabs.get_60_videos(
channel, continuation: continuation, sort_by: sort_by
)
rescue ex rescue ex
return error_json(500, ex) return error_json(500, ex)
end end
JSON.build do |json| return JSON.build do |json|
json.object do
json.field "videos" do
json.array do json.array do
videos.each do |video| videos.each &.to_json(locale, json)
video.to_json(locale, json)
end end
end end
json.field "continuation", next_continuation if next_continuation
end
end end
end end

View File

@ -32,7 +32,9 @@ module Invidious::Routes::Channels
sort_options = {"newest", "oldest", "popular"} sort_options = {"newest", "oldest", "popular"}
sort_by ||= "newest" sort_by ||= "newest"
count, items = get_60_videos(channel.ucid, channel.author, 1, channel.auto_generated, sort_by) items, continuation = Channel::Tabs.get_60_videos(
channel, continuation: continuation, sort_by: sort_by
)
end end
templated "channel" templated "channel"