Update SERVICE:ParseYTMetaDataFromHTML to check for broadcast end date

Move inner functions and tables outside of the function
Add HTML entity parsing for common symbols
Syntactic changes for consistency
This commit is contained in:
veitikka 2020-08-07 13:36:40 +03:00 committed by GitHub
parent b687ed60f1
commit 57a9077187
No known key found for this signature in database

View File

@ -52,59 +52,6 @@ local function convertISO8601Time( duration )
return duration
local function OnReceiveMetadata( self, callback, body )
local metadata = {}
-- Check for valid JSON response
local resp = util.JSONToTable( body )
if not resp then
return callback(false)
-- If 'error' key is present, the query failed.
if resp.error then
return callback(false, TableLookup(resp, 'error.message'))
-- We need at least one result
local results = TableLookup(resp, 'pageInfo.totalResults')
if not ( results and results > 0 ) then
return callback(false, "Requested video wasn't found")
local item = resp.items[1]
-- Video must be embeddable
if not TableLookup(item, 'status.embeddable') then
return callback( false, "Requested video was embed disabled" )
metadata.title = TableLookup(item, 'snippet.title')
-- Check for live broadcast
local liveBroadcast = TableLookup(item, 'snippet.liveBroadcastContent')
if liveBroadcast == 'none' then
-- Duration is an ISO 8601 string
local durationStr = TableLookup(item, 'contentDetails.duration')
metadata.duration = math.max(1, convertISO8601Time(durationStr))
metadata.duration = 0 -- mark as live video
-- 'medium' size thumbnail doesn't have letterboxing
metadata.thumbnail = TableLookup(item, 'snippet.thumbnails.medium.url')
self:SetMetadata(metadata, true)
if self:IsTimed() then
function SERVICE:GetMetadata( callback )
if self._metadata then
callback( self._metadata )
@ -139,12 +86,12 @@ function SERVICE:GetMetadata( callback )
local videoUrl = "https://www.youtube.com/watch?v="..videoId
self:Fetch( videoUrl,
--On Success
-- On Success
function( body, length, headers, code )
local metadata = self:ParseYTMetaDataFromHTML(body, videoId)
--html couldn't be parsed
if (!metadata.title || !metadata.duration) then
-- html couldn't be parsed
if not metadata.title or not isnumber(metadata.duration) then
callback(false, "Failed to parse HTML Page for metadata")
@ -161,7 +108,7 @@ function SERVICE:GetMetadata( callback )
function( code )
callback(false, "Failed to load YouTube ["..tostring(code).."]")
-- Headers
["User-Agent"] = "Googlebot"
@ -169,62 +116,93 @@ function SERVICE:GetMetadata( callback )
-- Get the value for an attribute from a html element
local function ParseElementAttribute( element, attribute )
if not element then return end
-- Find the desired attribute
local output = string.match( element, attribute.."%s-=%s-%b\"\"" )
if not output then return end
-- Remove the 'attribute=' part
output = string.gsub( output, attribute.."%s-=%s-", "" )
-- Trim the quotes around the value string
return string.sub( output, 2, -2 )
-- Get the contents of a html element by removing tags
-- Used as fallback for when title cannot be found
local function ParseElementContent( element )
if not element then return end
-- Trim start
local output = string.gsub( element, "^%s-<%w->%s-", "" )
-- Trim end
return string.gsub( output, "%s-</%w->%s-$", "" )
-- List of HTML entities to find in title and convert to their corresponding symbols
local htmlEnts = {
["&quot;"] = "\"",
["&lt;"] = "<",
["&gt;"] = ">",
["&amp;"] = "&"
-- Turn HTML entities into symbols
local function ParseTitleSymbols( string )
if not string then return end
local output = string
for entity, symbol in pairs( htmlEnts ) do
output = string.gsub( output, entity, symbol )
return output
-- Lua search patterns to find metadata from the html
local patterns = {
["title"] = "<meta%sproperty=\"og:title\"%s-content=%b\"\">",
["title_fallback"] = "<title>.-</title>",
["thumb"] = "<meta%sproperty=\"og:image\"%s-content=%b\"\">",
["thumb_fallback"] = "<link%sitemprop=\"thumbnailUrl\"%s-href=%b\"\">",
["duration"] = "<meta%sitemprop%s-=%s-\"duration\"%s-content%s-=%s-%b\"\">",
["live"] = "<meta%sitemprop%s-=%s-\"isLiveBroadcast\"%s-content%s-=%s-%b\"\">",
["live_enddate"] = "<meta%sitemprop%s-=%s-\"endDate\"%s-content%s-=%s-%b\"\">"
-- Function to parse video metadata straight from the html instead of using the API
function SERVICE:ParseYTMetaDataFromHTML( html, videoId )
-- Get the value for an attribute from a html element
local function ParseElementAttribute( element, attribute )
if !element then return end
-- Find the desired attribute
local output = string.match( element, attribute.."%s-=%s-%b\"\"" )
if !output then return end
-- Remove the 'attribute=' part
output = string.gsub( output, attribute.."%s-=%s-", "" )
-- Trim the quotes around the value string
return string.sub( output, 2, -2 )
-- Get the contents of a html element by removing tags
-- Used as fallback for when title cannot be found
local function ParseElementContent( element )
if !element then return end
-- Trim start
local output = string.gsub( element, "^%s-<%w->%s-", "" )
-- Trim end
return string.gsub( output, "%s-</%w->%s-$", "" )
function SERVICE:ParseYTMetaDataFromHTML( html, videoId )
--MetaData table to return when we're done
local metadata = {}
-- Lua search patterns to find metadata from the html
local patterns = {
["title"] = "<meta%sproperty=\"og:title\"%s-content=%b\"\">",
["title_fallback"] = "<title>.-</title>",
["thumb"] = "<meta%sproperty=\"og:image\"%s-content=%b\"\">",
["thumb_fallback"] = "<link%sitemprop=\"thumbnailUrl\"%s-href=%b\"\">",
["duration"] = "<meta%sitemprop%s-=%s-\"duration\"%s-content%s-=%s-%b\"\">",
["live"] = "<meta%sitemprop%s-=%s-\"isLiveBroadcast\"%s-content%s-=%s-%b\"\">"
-- Fetch title and thumbnail, with fallbacks if needed
metadata.title = ParseElementAttribute(string.match(html, patterns["title"]), "content")
or ParseElementContent(string.match(body, patterns["title_fallback"]))
or ParseElementContent(string.match(html, patterns["title_fallback"]))
-- Parse HTML entities in the title into symbols
metadata.title = ParseTitleSymbols(metadata.title)
metadata.thumbnail = ParseElementAttribute(string.match(html, patterns["thumb"]), "content")
or ParseElementAttribute(string.match(body, patterns["thumb_fallback"]), "href")
or ParseElementAttribute(string.match(html, patterns["thumb_fallback"]), "href")
-- See if the video is a live broadcast
-- Set duration to 0 if it is, otherwise use the actual duration
local isLiveBroadcast = tobool(ParseElementAttribute(string.match(html, patterns["live"]), "content"))
if (isLiveBroadcast) then
-- See if the video is an ongoing live broadcast
-- Set duration to 0 if it is, otherwise use the actual duration
local isLiveBroadcast = tobool(ParseElementAttribute(string.match(html, patterns["live"]), "content"))
local broadcastEndDate = string.match(html, patterns["live_enddate"])
if isLiveBroadcast and not broadcastEndDate then
-- Mark as live video
metadata.duration = 0
local durationISO8601 = ParseElementAttribute(string.match(html, patterns["duration"]), "content")
metadata.duration = math.max(1, convertISO8601Time(durationISO8601))
metadata.duration = 0
local durationISO8601 = ParseElementAttribute(string.match(html, patterns["duration"]), "content")
if isstring(durationISO8601) then
metadata.duration = math.max(1, convertISO8601Time(durationISO8601))
return metadata