mirror of https://github.com/iv-org/invidious.git
extractors: Add continuation token parser
This commit is contained in:
parent
bdc51cd20f
commit
ce7db8d2cb
|
@ -4,7 +4,7 @@ Spectator.describe Invidious::Hashtag do
|
||||||
it "parses richItemRenderer containers (test 1)" do
|
it "parses richItemRenderer containers (test 1)" do
|
||||||
# Enable mock
|
# Enable mock
|
||||||
test_content = load_mock("hashtag/martingarrix_page1")
|
test_content = load_mock("hashtag/martingarrix_page1")
|
||||||
videos = extract_items(test_content)
|
videos, _ = extract_items(test_content)
|
||||||
|
|
||||||
expect(typeof(videos)).to eq(Array(SearchItem))
|
expect(typeof(videos)).to eq(Array(SearchItem))
|
||||||
expect(videos.size).to eq(60)
|
expect(videos.size).to eq(60)
|
||||||
|
@ -57,7 +57,7 @@ Spectator.describe Invidious::Hashtag do
|
||||||
it "parses richItemRenderer containers (test 2)" do
|
it "parses richItemRenderer containers (test 2)" do
|
||||||
# Enable mock
|
# Enable mock
|
||||||
test_content = load_mock("hashtag/martingarrix_page2")
|
test_content = load_mock("hashtag/martingarrix_page2")
|
||||||
videos = extract_items(test_content)
|
videos, _ = extract_items(test_content)
|
||||||
|
|
||||||
expect(typeof(videos)).to eq(Array(SearchItem))
|
expect(typeof(videos)).to eq(Array(SearchItem))
|
||||||
expect(videos.size).to eq(60)
|
expect(videos.size).to eq(60)
|
||||||
|
|
|
@ -1,18 +1,7 @@
|
||||||
def fetch_channel_playlists(ucid, author, continuation, sort_by)
|
def fetch_channel_playlists(ucid, author, continuation, sort_by)
|
||||||
if continuation
|
if continuation
|
||||||
response_json = YoutubeAPI.browse(continuation)
|
response_json = YoutubeAPI.browse(continuation)
|
||||||
continuation_items = response_json["onResponseReceivedActions"]?
|
items, continuation = extract_items(response_json, author, ucid)
|
||||||
.try &.[0]["appendContinuationItemsAction"]["continuationItems"]
|
|
||||||
|
|
||||||
return [] of SearchItem, nil if !continuation_items
|
|
||||||
|
|
||||||
items = [] of SearchItem
|
|
||||||
continuation_items.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item|
|
|
||||||
parse_item(item, author, ucid).try { |t| items << t }
|
|
||||||
}
|
|
||||||
|
|
||||||
continuation = continuation_items.as_a.last["continuationItemRenderer"]?
|
|
||||||
.try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s
|
|
||||||
else
|
else
|
||||||
url = "/channel/#{ucid}/playlists?flow=list&view=1"
|
url = "/channel/#{ucid}/playlists?flow=list&view=1"
|
||||||
|
|
||||||
|
@ -30,8 +19,7 @@ def fetch_channel_playlists(ucid, author, continuation, sort_by)
|
||||||
initial_data = extract_initial_data(response.body)
|
initial_data = extract_initial_data(response.body)
|
||||||
return [] of SearchItem, nil if !initial_data
|
return [] of SearchItem, nil if !initial_data
|
||||||
|
|
||||||
items = extract_items(initial_data, author, ucid)
|
items, continuation = extract_items(initial_data, author, ucid)
|
||||||
continuation = response.body.match(/"token":"(?<continuation>[^"]+)"/).try &.["continuation"]?
|
|
||||||
end
|
end
|
||||||
|
|
||||||
return items, continuation
|
return items, continuation
|
||||||
|
|
|
@ -8,7 +8,8 @@ module Invidious::Hashtag
|
||||||
client_config = YoutubeAPI::ClientConfig.new(region: region)
|
client_config = YoutubeAPI::ClientConfig.new(region: region)
|
||||||
response = YoutubeAPI.browse(continuation: ctoken, client_config: client_config)
|
response = YoutubeAPI.browse(continuation: ctoken, client_config: client_config)
|
||||||
|
|
||||||
return extract_items(response)
|
items, _ = extract_items(response)
|
||||||
|
return items
|
||||||
end
|
end
|
||||||
|
|
||||||
def generate_continuation(hashtag : String, cursor : Int)
|
def generate_continuation(hashtag : String, cursor : Int)
|
||||||
|
|
|
@ -265,4 +265,11 @@ class Category
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
struct Continuation
|
||||||
|
getter token
|
||||||
|
|
||||||
|
def initialize(@token : String)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category
|
alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category
|
||||||
|
|
|
@ -9,7 +9,8 @@ module Invidious::Search
|
||||||
client_config = YoutubeAPI::ClientConfig.new(region: query.region)
|
client_config = YoutubeAPI::ClientConfig.new(region: query.region)
|
||||||
initial_data = YoutubeAPI.search(query.text, search_params, client_config: client_config)
|
initial_data = YoutubeAPI.search(query.text, search_params, client_config: client_config)
|
||||||
|
|
||||||
return extract_items(initial_data)
|
items, _ = extract_items(initial_data)
|
||||||
|
return items
|
||||||
end
|
end
|
||||||
|
|
||||||
# Search a youtube channel
|
# Search a youtube channel
|
||||||
|
@ -30,16 +31,7 @@ module Invidious::Search
|
||||||
continuation = produce_channel_search_continuation(ucid, query.text, query.page)
|
continuation = produce_channel_search_continuation(ucid, query.text, query.page)
|
||||||
response_json = YoutubeAPI.browse(continuation)
|
response_json = YoutubeAPI.browse(continuation)
|
||||||
|
|
||||||
continuation_items = response_json["onResponseReceivedActions"]?
|
items, _ = extract_items(response_json, "", ucid)
|
||||||
.try &.[0]["appendContinuationItemsAction"]["continuationItems"]
|
|
||||||
|
|
||||||
return [] of SearchItem if !continuation_items
|
|
||||||
|
|
||||||
items = [] of SearchItem
|
|
||||||
continuation_items.as_a.select(&.as_h.has_key?("itemSectionRenderer")).each do |item|
|
|
||||||
parse_item(item["itemSectionRenderer"]["contents"].as_a[0]).try { |t| items << t }
|
|
||||||
end
|
|
||||||
|
|
||||||
return items
|
return items
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ require "../helpers/serialized_yt_data"
|
||||||
private ITEM_CONTAINER_EXTRACTOR = {
|
private ITEM_CONTAINER_EXTRACTOR = {
|
||||||
Extractors::YouTubeTabs,
|
Extractors::YouTubeTabs,
|
||||||
Extractors::SearchResults,
|
Extractors::SearchResults,
|
||||||
Extractors::Continuation,
|
Extractors::ContinuationContent,
|
||||||
}
|
}
|
||||||
|
|
||||||
private ITEM_PARSERS = {
|
private ITEM_PARSERS = {
|
||||||
|
@ -18,6 +18,7 @@ private ITEM_PARSERS = {
|
||||||
Parsers::CategoryRendererParser,
|
Parsers::CategoryRendererParser,
|
||||||
Parsers::RichItemRendererParser,
|
Parsers::RichItemRendererParser,
|
||||||
Parsers::ReelItemRendererParser,
|
Parsers::ReelItemRendererParser,
|
||||||
|
Parsers::ContinuationItemRendererParser,
|
||||||
}
|
}
|
||||||
|
|
||||||
private alias InitialData = Hash(String, JSON::Any)
|
private alias InitialData = Hash(String, JSON::Any)
|
||||||
|
@ -347,14 +348,9 @@ private module Parsers
|
||||||
content_container = item_contents["contents"]
|
content_container = item_contents["contents"]
|
||||||
end
|
end
|
||||||
|
|
||||||
raw_contents = content_container["items"]?.try &.as_a
|
content_container["items"]?.try &.as_a.each do |item|
|
||||||
if !raw_contents.nil?
|
result = parse_item(item, author_fallback.name, author_fallback.id)
|
||||||
raw_contents.each do |item|
|
contents << result if result.is_a?(SearchItem)
|
||||||
result = parse_item(item)
|
|
||||||
if !result.nil?
|
|
||||||
contents << result
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
Category.new({
|
Category.new({
|
||||||
|
@ -477,6 +473,35 @@ private module Parsers
|
||||||
return {{@type.name}}
|
return {{@type.name}}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Parses an InnerTube continuationItemRenderer into a Continuation.
|
||||||
|
# Returns nil when the given object isn't a continuationItemRenderer.
|
||||||
|
#
|
||||||
|
# continuationItemRenderer contains various metadata ued to load more
|
||||||
|
# content (i.e when the user scrolls down). The interesting bit is the
|
||||||
|
# protobuf object known as the "continutation token". Previously, those
|
||||||
|
# were generated from sratch, but recent (as of 11/2022) Youtube changes
|
||||||
|
# are forcing us to extract them from replies.
|
||||||
|
#
|
||||||
|
module ContinuationItemRendererParser
|
||||||
|
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
|
||||||
|
if item_contents = item["continuationItemRenderer"]?
|
||||||
|
return self.parse(item_contents)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
private def self.parse(item_contents)
|
||||||
|
token = item_contents
|
||||||
|
.dig?("continuationEndpoint", "continuationCommand", "token")
|
||||||
|
.try &.as_s
|
||||||
|
|
||||||
|
return Continuation.new(token) if token
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.parser_name
|
||||||
|
return {{@type.name}}
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# The following are the extractors for extracting an array of items from
|
# The following are the extractors for extracting an array of items from
|
||||||
|
@ -746,13 +771,18 @@ def extract_items(
|
||||||
initial_data : InitialData,
|
initial_data : InitialData,
|
||||||
author_fallback : String? = nil,
|
author_fallback : String? = nil,
|
||||||
author_id_fallback : String? = nil
|
author_id_fallback : String? = nil
|
||||||
) : Array(SearchItem)
|
) : {Array(SearchItem), String?}
|
||||||
items = [] of SearchItem
|
items = [] of SearchItem
|
||||||
|
continuation = nil
|
||||||
|
|
||||||
extract_items(initial_data) do |item|
|
extract_items(initial_data) do |item|
|
||||||
parsed = parse_item(item, author_fallback, author_id_fallback)
|
parsed = parse_item(item, author_fallback, author_id_fallback)
|
||||||
items << parsed if !parsed.nil?
|
|
||||||
|
case parsed
|
||||||
|
when .is_a?(Continuation) then continuation = parsed.token
|
||||||
|
when .is_a?(SearchItem) then items << parsed
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return items
|
return items, continuation
|
||||||
end
|
end
|
||||||
|
|
|
@ -68,10 +68,10 @@ rescue ex
|
||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
|
def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) : Array(SearchVideo)
|
||||||
extracted = extract_items(initial_data, author_fallback, author_id_fallback)
|
extracted, _ = extract_items(initial_data, author_fallback, author_id_fallback)
|
||||||
|
|
||||||
target = [] of SearchItem
|
target = [] of (SearchItem | Continuation)
|
||||||
extracted.each do |i|
|
extracted.each do |i|
|
||||||
if i.is_a?(Category)
|
if i.is_a?(Category)
|
||||||
i.contents.each { |cate_i| target << cate_i if !cate_i.is_a? Video }
|
i.contents.each { |cate_i| target << cate_i if !cate_i.is_a? Video }
|
||||||
|
@ -79,28 +79,11 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str
|
||||||
target << i
|
target << i
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
return target.select(SearchVideo).map(&.as(SearchVideo))
|
|
||||||
|
return target.select(SearchVideo)
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_selected_tab(tabs)
|
def extract_selected_tab(tabs)
|
||||||
# Extract the selected tab from the array of tabs Youtube returns
|
# Extract the selected tab from the array of tabs Youtube returns
|
||||||
return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"]?.try &.as_bool)[0]["tabRenderer"]
|
return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"]?.try &.as_bool)[0]["tabRenderer"]
|
||||||
end
|
end
|
||||||
|
|
||||||
def fetch_continuation_token(items : Array(JSON::Any))
|
|
||||||
# Fetches the continuation token from an array of items
|
|
||||||
return items.last["continuationItemRenderer"]?
|
|
||||||
.try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s
|
|
||||||
end
|
|
||||||
|
|
||||||
def fetch_continuation_token(initial_data : Hash(String, JSON::Any))
|
|
||||||
# Fetches the continuation token from initial data
|
|
||||||
if initial_data["onResponseReceivedActions"]?
|
|
||||||
continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"]
|
|
||||||
else
|
|
||||||
tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"])
|
|
||||||
continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"]
|
|
||||||
end
|
|
||||||
|
|
||||||
return fetch_continuation_token(continuation_items.as_a)
|
|
||||||
end
|
|
||||||
|
|
Loading…
Reference in New Issue