Skip to content
Snippets Groups Projects
Verified Commit d36c2ec8 authored by Janne Mareike Koschinski's avatar Janne Mareike Koschinski
Browse files

Improved parsing

parent b197e090
No related branches found
No related tags found
No related merge requests found
...@@ -38,74 +38,95 @@ type OEmbed struct { ...@@ -38,74 +38,95 @@ type OEmbed struct {
} }
type InternalData struct { type InternalData struct {
url string url *url.URL
oembed_url string oembedUrl string
twitter_card string twitterCard string
twitter_site string twitterSite string
twitter_site_id string twitterSiteId string
twitter_creator string twitterCreator string
twitter_creator_id string twitterCreatorId string
twitter_description string twitterDescription string
twitter_title string twitterTitle string
twitter_image string twitterImage string
twitter_image_alt string twitterImageAlt string
twitter_player string twitterPlayer string
twitter_player_width string twitterPlayerWidth string
twitter_player_height string twitterPlayerHeight string
twitter_player_stream string twitterPlayerStream string
twitter_app_name_iphone string twitterAppNameIphone string
twitter_app_id_iphone string twitterAppIdIphone string
twitter_app_url_iphone string twitterAppUrlIphone string
twitter_app_name_ipad string twitterAppNameIpad string
twitter_app_id_ipad string twitterAppIdIpad string
twitter_app_url_ipad string twitterAppUrlIpad string
twitter_app_name_googleplay string twitterAppNameGoogleplay string
twitter_app_id_googleplay string twitterAppIdGoogleplay string
twitter_app_url_googleplay string twitterAppUrlGoogleplay string
twitter_label1 string twitterLabel1 string
twitter_data1 string twitterData1 string
twitter_label2 string twitterLabel2 string
twitter_data2 string twitterData2 string
og_url string ogUrl string
og_title string ogTitle string
og_description string ogDescription string
og_type string ogType string
og_locale string ogLocale string
og_video string ogVideo string
og_video_url string ogVideoUrl string
og_video_secure_url string ogVideoSecureUrl string
og_video_type string ogVideoType string
og_video_width string ogVideoWidth string
og_video_height string ogVideoHeight string
og_image string ogImage string
og_image_url string ogImageUrl string
og_image_secure_url string ogImageSecureUrl string
og_image_type string ogImageType string
og_image_width string ogImageWidth string
og_image_height string ogImageHeight string
og_audio string ogAudio string
og_audio_url string ogAudioUrl string
og_audio_secure_url string ogAudioSecureUrl string
og_audio_type string ogAudioType string
og_site_name string ogSiteName string
article_author string articleAuthor string
article_published_time string articlePublisher string
articlePublishedTime string
meta_title string
meta_description string soundcloudUser string
meta_author string soundcloudPlayCount string
meta_theme_color string soundcloudDownloadCount string
link_favicon string soundcloudCommentsCount string
link_author string soundcloudLikeCount string
soundcloudSoundCount string
soundcloudFollowerCount string
metaTitle string
metaDescription string
metaAuthor string
metaThemeColor string
linkFavicon string
linkAuthor string
title string title string
urlProviderIconIco string
urlProviderIconPng string
urlProviderOgSiteName string
urlProviderMetaThemeColor string
urlProviderMetaTitle string
urlProviderLinkFavicon string
urlProviderTitle string
rawType string
} }
type DataField struct { type DataField struct {
...@@ -124,8 +145,8 @@ type Data struct { ...@@ -124,8 +145,8 @@ type Data struct {
Text string `json:"text"` Text string `json:"text"`
Fields []DataField `json:"fields"` Fields []DataField `json:"fields"`
ImageUrl string `json:"image_url"` ImageUrl string `json:"image_url"`
LargeImage bool `json:"large_image"` Type string `json:"type"`
Types []string `json:"types"` Player string `json:"player"`
ThumbUrl string `json:"thumb_url"` ThumbUrl string `json:"thumb_url"`
Footer string `json:"footer"` Footer string `json:"footer"`
FooterIcon string `json:"footer_icon"` FooterIcon string `json:"footer_icon"`
...@@ -142,9 +163,31 @@ func coalesce(list []string) string { ...@@ -142,9 +163,31 @@ func coalesce(list []string) string {
return "" return ""
} }
func buildData(in InternalData, oEmbed OEmbed) (out Data) { func coalesceFilter(condition func(string) bool, list []string) string {
base, _ := url.ParseRequestURI(in.url) for _, str := range list {
resolve := func(path string) string { str = strings.TrimSpace(str)
if str != "" && condition(str) {
return str
}
}
return ""
}
func isUrl(value string) bool {
if strings.Contains(value, "://") {
return true
}
if strings.HasPrefix(value, "/") {
return true
}
return false
}
func isNotUrl(value string) bool {
return !isUrl(value)
}
func resolve(base *url.URL, path string) string {
if path == "" || base == nil { if path == "" || base == nil {
return "" return ""
} }
...@@ -156,55 +199,93 @@ func buildData(in InternalData, oEmbed OEmbed) (out Data) { ...@@ -156,55 +199,93 @@ func buildData(in InternalData, oEmbed OEmbed) (out Data) {
return absolute.String() return absolute.String()
} }
func buildData(in InternalData, oEmbed OEmbed, providerFallbacks []func(*InternalData)) (out Data) {
out = Data{} out = Data{}
out.TitleLink = in.url out.TitleLink = in.url.String()
out.Title = coalesce([]string{in.twitter_title, in.og_title, in.meta_title, in.title}) out.Title = coalesce([]string{in.twitterTitle, in.ogTitle, in.metaTitle, in.title})
out.Text = coalesce([]string{in.twitter_description, in.og_description, in.meta_description}) out.Text = coalesce([]string{in.twitterDescription, in.ogDescription, in.metaDescription})
out.AuthorName = coalesce([]string{oEmbed.AuthorName, in.meta_author}) out.AuthorName = coalesceFilter(isNotUrl, []string{oEmbed.AuthorName, in.articleAuthor, in.articlePublisher, in.metaAuthor})
out.AuthorLink = resolve(coalesce([]string{oEmbed.AuthorUrl, in.article_author, in.link_author})) out.AuthorLink = resolve(in.url, coalesceFilter(isUrl, []string{oEmbed.AuthorUrl, in.articleAuthor, in.articlePublisher, in.soundcloudUser, in.linkAuthor}))
out.Color = coalesce([]string{in.meta_theme_color})
out.Footer = coalesce([]string{oEmbed.ProviderName, in.og_site_name}) for _, fallback := range providerFallbacks {
out.FooterIcon = resolve(coalesce([]string{in.link_favicon})) out.Color = coalesce([]string{in.metaThemeColor, in.urlProviderMetaThemeColor})
out.Footer = coalesce([]string{oEmbed.ProviderName, in.ogSiteName, in.urlProviderOgSiteName, in.urlProviderMetaTitle, in.urlProviderTitle})
out.FooterIcon = resolve(in.url, coalesce([]string{in.linkFavicon, in.urlProviderLinkFavicon, in.urlProviderIconPng, in.urlProviderIconIco}))
if out.Footer != "" && out.FooterIcon != "" {
break
}
fallback(&in)
}
var largeImages []string var largeImages []string
var smallImages []string var smallImages []string
if in.twitter_card == "summary" || in.twitter_card == "summary_large_image" { if in.twitterCard == "summary" || in.twitterCard == "summary_large_image" {
largeImages = append(largeImages, in.twitter_image) largeImages = append(largeImages, in.twitterImage)
} else { } else {
smallImages = append(smallImages, in.twitter_image) smallImages = append(smallImages, in.twitterImage)
} }
largeImages = append(largeImages, in.og_image) largeImages = append(largeImages, in.ogImage)
smallImages = append(smallImages, in.link_favicon) smallImages = append(smallImages, in.ogImage)
if in.rawType == "image" {
largeTypes := []string{ largeImages = append(largeImages, in.url.String())
"video", }
"video.other", // Only use favicon as thumbnail icon if we don’t already use it for the footer
"article", if out.Footer == "" {
"summary_large_image", smallImages = append(smallImages, in.linkFavicon)
"player",
} }
isLargeImage := func (format string) bool { hasType := func(formats []string, types []string) bool {
for _, t := range largeTypes { for _, format := range formats {
for _, t := range types {
if t == format { if t == format {
return true return true
} }
} }
}
return false return false
} }
out.LargeImage = isLargeImage(oEmbed.Type) || isLargeImage(in.og_type) || isLargeImage(in.twitter_card) if in.rawType == "video" || in.rawType == "audio" {
out.Player = in.url.String()
out.Type = "player"
} else if in.rawType == "image" {
out.Type = "image"
} else {
if hasType([]string{oEmbed.Type, in.twitterCard, in.ogType}, []string{"article", "summary_large_image", "image"}) {
out.Type = "image"
} else if hasType([]string{oEmbed.Type, in.twitterCard, in.ogType}, []string{"player", "video", "video.other", "music", "music.song", "audio"}) {
out.Type = "player"
}
if out.Type == "player" {
out.Player = coalesce([]string{in.twitterPlayer, in.ogVideoSecureUrl, in.ogVideoUrl, in.ogAudioSecureUrl, in.ogAudioUrl})
}
}
if out.Type == "" {
out.Type = "article"
}
out.ThumbUrl = resolve(in.url, coalesce(smallImages))
out.ImageUrl = resolve(in.url, coalesce(largeImages))
types := []string{} buildField := func(title string, value string) {
for _, format := range []string{oEmbed.Type, in.og_type, in.twitter_card} { const MaxShortFieldLength = 32
if format != "" { value = strings.TrimSpace(value)
types = append(types, format) if value != "" && value != "0" {
out.Fields = append(out.Fields, DataField{
Title: title,
Value: value,
Short: len(value) < MaxShortFieldLength && len(title) < MaxShortFieldLength,
})
} }
} }
out.Types = types
out.ThumbUrl = resolve(coalesce(smallImages)) buildField("Play Count", in.soundcloudPlayCount)
out.ImageUrl = resolve(coalesce(largeImages)) buildField("Download Count", in.soundcloudDownloadCount)
buildField("Comments Count", in.soundcloudCommentsCount)
buildField("Like Count", in.soundcloudLikeCount)
buildField("Sound Count", in.soundcloudSoundCount)
buildField("Follower Count", in.soundcloudFollowerCount)
return return
} }
...@@ -214,247 +295,290 @@ func main() { ...@@ -214,247 +295,290 @@ func main() {
matchers := map[string]func(*InternalData, string, string){} matchers := map[string]func(*InternalData, string, string){}
matchers["meta/twitter:card"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:card"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_card = content internalData.twitterCard = content
} }
matchers["meta/twitter:site"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:site"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_site = content internalData.twitterSite = content
} }
matchers["meta/twitter:site:id"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:site:id"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_site_id = content internalData.twitterSiteId = content
} }
matchers["meta/twitter:creator"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:creator"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_creator = content internalData.twitterCreator = content
} }
matchers["meta/twitter:creator:id"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:creator:id"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_creator_id = content internalData.twitterCreatorId = content
} }
matchers["meta/twitter:description"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:description"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_description = content internalData.twitterDescription = content
} }
matchers["meta/twitter:title"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:title"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_title = content internalData.twitterTitle = content
} }
matchers["meta/twitter:image"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:image"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_image = content internalData.twitterImage = content
} }
matchers["meta/twitter:image:src"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:image:src"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_image = content internalData.twitterImage = content
} }
matchers["meta/twitter:image:alt"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:image:alt"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_image_alt = content internalData.twitterImageAlt = content
} }
matchers["meta/twitter:player"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:player"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_player = content internalData.twitterPlayer = content
} }
matchers["meta/twitter:player:width"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:player:width"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_player_width = content internalData.twitterPlayerWidth = content
} }
matchers["meta/twitter:player:height"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:player:height"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_player_height = content internalData.twitterPlayerHeight = content
} }
matchers["meta/twitter:player:stream"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:player:stream"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_player_stream = content internalData.twitterPlayerStream = content
} }
matchers["meta/twitter:app:name:iphone"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:app:name:iphone"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_app_name_iphone = content internalData.twitterAppNameIphone = content
} }
matchers["meta/twitter:app:id:iphone"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:app:id:iphone"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_app_id_iphone = content internalData.twitterAppIdIphone = content
} }
matchers["meta/twitter:app:url:iphone"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:app:url:iphone"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_app_url_iphone = content internalData.twitterAppUrlIphone = content
} }
matchers["meta/twitter:app:name:ipad"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:app:name:ipad"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_app_name_ipad = content internalData.twitterAppNameIpad = content
} }
matchers["meta/twitter:app:id:ipad"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:app:id:ipad"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_app_id_ipad = content internalData.twitterAppIdIpad = content
} }
matchers["meta/twitter:app:url:ipad"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:app:url:ipad"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_app_url_ipad = content internalData.twitterAppUrlIpad = content
} }
matchers["meta/twitter:app:name:googleplay"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:app:name:googleplay"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_app_name_googleplay = content internalData.twitterAppNameGoogleplay = content
} }
matchers["meta/twitter:app:id:googleplay"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:app:id:googleplay"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_app_id_googleplay = content internalData.twitterAppIdGoogleplay = content
} }
matchers["meta/twitter:app:url:googleplay"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:app:url:googleplay"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_app_url_googleplay = content internalData.twitterAppUrlGoogleplay = content
} }
matchers["meta/twitter:label1"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:label1"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_label1 = content internalData.twitterLabel1 = content
} }
matchers["meta/twitter:data1"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:data1"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_data1 = content internalData.twitterData1 = content
} }
matchers["meta/twitter:label2"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:label2"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_label2 = content internalData.twitterLabel2 = content
} }
matchers["meta/twitter:data2"] = func(internalData *InternalData, content string, extra string) { matchers["meta/twitter:data2"] = func(internalData *InternalData, content string, extra string) {
internalData.twitter_data2 = content internalData.twitterData2 = content
} }
matchers["meta/og:url"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:url"] = func(internalData *InternalData, content string, extra string) {
internalData.og_url = content internalData.ogUrl = content
} }
matchers["meta/og:title"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:title"] = func(internalData *InternalData, content string, extra string) {
internalData.og_title = content internalData.ogTitle = content
} }
matchers["meta/og:description"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:description"] = func(internalData *InternalData, content string, extra string) {
internalData.og_description = content internalData.ogDescription = content
} }
matchers["meta/og:type"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:type"] = func(internalData *InternalData, content string, extra string) {
internalData.og_type = content internalData.ogType = content
} }
matchers["meta/og:locale"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:locale"] = func(internalData *InternalData, content string, extra string) {
internalData.og_locale = content internalData.ogLocale = content
} }
matchers["meta/og:video"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:video"] = func(internalData *InternalData, content string, extra string) {
internalData.og_video = content internalData.ogVideo = content
} }
matchers["meta/og:video:url"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:video:url"] = func(internalData *InternalData, content string, extra string) {
internalData.og_video_url = content internalData.ogVideoUrl = content
} }
matchers["meta/og:video:secure_url"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:video:secure_url"] = func(internalData *InternalData, content string, extra string) {
internalData.og_video_secure_url = content internalData.ogVideoSecureUrl = content
} }
matchers["meta/og:video:type"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:video:type"] = func(internalData *InternalData, content string, extra string) {
internalData.og_video_type = content internalData.ogVideoType = content
} }
matchers["meta/og:video:width"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:video:width"] = func(internalData *InternalData, content string, extra string) {
internalData.og_video_width = content internalData.ogVideoWidth = content
} }
matchers["meta/og:video:height"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:video:height"] = func(internalData *InternalData, content string, extra string) {
internalData.og_video_height = content internalData.ogVideoHeight = content
} }
matchers["meta/og:image"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:image"] = func(internalData *InternalData, content string, extra string) {
internalData.og_image = content internalData.ogImage = content
} }
matchers["meta/og:image:url"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:image:url"] = func(internalData *InternalData, content string, extra string) {
internalData.og_image_url = content internalData.ogImageUrl = content
} }
matchers["meta/og:image:secure_url"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:image:secure_url"] = func(internalData *InternalData, content string, extra string) {
internalData.og_image_secure_url = content internalData.ogImageSecureUrl = content
} }
matchers["meta/og:image:type"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:image:type"] = func(internalData *InternalData, content string, extra string) {
internalData.og_image_type = content internalData.ogImageType = content
} }
matchers["meta/og:image:width"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:image:width"] = func(internalData *InternalData, content string, extra string) {
internalData.og_image_width = content internalData.ogImageWidth = content
} }
matchers["meta/og:image:height"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:image:height"] = func(internalData *InternalData, content string, extra string) {
internalData.og_image_height = content internalData.ogImageHeight = content
} }
matchers["meta/og:audio"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:audio"] = func(internalData *InternalData, content string, extra string) {
internalData.og_audio = content internalData.ogAudio = content
} }
matchers["meta/og:audio:url"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:audio:url"] = func(internalData *InternalData, content string, extra string) {
internalData.og_audio_url = content internalData.ogAudioUrl = content
} }
matchers["meta/og:audio:secure_url"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:audio:secure_url"] = func(internalData *InternalData, content string, extra string) {
internalData.og_audio_secure_url = content internalData.ogAudioSecureUrl = content
} }
matchers["meta/og:audio:type"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:audio:type"] = func(internalData *InternalData, content string, extra string) {
internalData.og_audio_type = content internalData.ogAudioType = content
} }
matchers["meta/og:site_name"] = func(internalData *InternalData, content string, extra string) { matchers["meta/og:site_name"] = func(internalData *InternalData, content string, extra string) {
internalData.og_site_name = content internalData.ogSiteName = content
} }
matchers["meta/article:author"] = func(internalData *InternalData, content string, extra string) { matchers["meta/article:author"] = func(internalData *InternalData, content string, extra string) {
internalData.article_author = content internalData.articleAuthor = content
}
matchers["meta/article:publisher"] = func(internalData *InternalData, content string, extra string) {
internalData.articlePublisher = content
} }
matchers["meta/article:published_time"] = func(internalData *InternalData, content string, extra string) { matchers["meta/article:published_time"] = func(internalData *InternalData, content string, extra string) {
internalData.article_published_time = content internalData.articlePublishedTime = content
}
matchers["meta/soundcloud:user"] = func(internalData *InternalData, content string, extra string) {
internalData.soundcloudUser = content
}
matchers["meta/soundcloud:play_count"] = func(internalData *InternalData, content string, extra string) {
internalData.soundcloudPlayCount = content
}
matchers["meta/soundcloud:download_count"] = func(internalData *InternalData, content string, extra string) {
internalData.soundcloudDownloadCount = content
}
matchers["meta/soundcloud:comments_count"] = func(internalData *InternalData, content string, extra string) {
internalData.soundcloudCommentsCount = content
}
matchers["meta/soundcloud:like_count"] = func(internalData *InternalData, content string, extra string) {
internalData.soundcloudLikeCount = content
}
matchers["meta/soundcloud:sound_count"] = func(internalData *InternalData, content string, extra string) {
internalData.soundcloudSoundCount = content
}
matchers["meta/soundcloud:follower_count"] = func(internalData *InternalData, content string, extra string) {
internalData.soundcloudFollowerCount = content
} }
matchers["meta/title"] = func(internalData *InternalData, content string, extra string) { matchers["meta/title"] = func(internalData *InternalData, content string, extra string) {
internalData.meta_title = content internalData.metaTitle = content
} }
matchers["meta/description"] = func(internalData *InternalData, content string, extra string) { matchers["meta/description"] = func(internalData *InternalData, content string, extra string) {
internalData.meta_description = content internalData.metaDescription = content
} }
matchers["meta/author"] = func(internalData *InternalData, content string, extra string) { matchers["meta/author"] = func(internalData *InternalData, content string, extra string) {
internalData.meta_author = content internalData.metaAuthor = content
} }
matchers["meta/theme-color"] = func(internalData *InternalData, content string, extra string) { matchers["meta/theme-color"] = func(internalData *InternalData, content string, extra string) {
internalData.meta_theme_color = content internalData.metaThemeColor = content
} }
matchers["link/icon"] = func(internalData *InternalData, content string, extra string) { matchers["link/icon"] = func(internalData *InternalData, content string, extra string) {
internalData.link_favicon = content internalData.linkFavicon = content
} }
matchers["link/author"] = func(internalData *InternalData, content string, extra string) { matchers["link/author"] = func(internalData *InternalData, content string, extra string) {
internalData.link_author = content internalData.linkAuthor = content
} }
matchers["link/alternate"] = func(internalData *InternalData, content string, extra string) { matchers["link/alternate"] = func(internalData *InternalData, content string, extra string) {
if extra == "application/json+oembed" { if extra == "application/json+oembed" {
internalData.oembed_url = content internalData.oembedUrl = content
} }
} }
matchers["title"] = func(internalData *InternalData, content string, extra string) { matchers["title"] = func(internalData *InternalData, content string, extra string) {
internalData.title = content internalData.title = content
} }
loadData := func(url string) (Data, error) { providerMatchers := map[string]func(*InternalData, string, string){}
fmt.Printf("Searching for %s\n", url)
providerMatchers["meta/og:site_name"] = func(internalData *InternalData, content string, extra string) {
internalData.urlProviderOgSiteName = content
}
providerMatchers["meta/title"] = func(internalData *InternalData, content string, extra string) {
internalData.urlProviderMetaTitle = content
}
providerMatchers["meta/theme-color"] = func(internalData *InternalData, content string, extra string) {
internalData.urlProviderMetaThemeColor = content
}
providerMatchers["link/icon"] = func(internalData *InternalData, content string, extra string) {
internalData.urlProviderLinkFavicon = content
}
providerMatchers["title"] = func(internalData *InternalData, content string, extra string) {
internalData.urlProviderTitle = content
}
loadData := func(requestedUrl string) (Data, error) {
fmt.Printf("Searching for %s\n", requestedUrl)
var data Data var data Data
resp, err := client.Get(url) resp, err := client.Get(requestedUrl)
if err != nil { if err != nil {
return data, err return data, err
} }
internalData := InternalData{ base, _ := url.ParseRequestURI(requestedUrl)
url: url,
}
var parseNode func(*html.Node) var parseNode func(map[string]func(*InternalData, string, string), *InternalData, *html.Node)
parseNode = func(n *html.Node) { parseNode = func(matchers map[string]func(*InternalData, string, string), internalData *InternalData, n *html.Node) {
if n.Type == html.ElementNode && n.Data == "meta" { if n.Type == html.ElementNode {
attrs := map[string]string{} attrs := map[string]string{}
for _, attr := range n.Attr { for _, attr := range n.Attr {
attrs[attr.Key] = attr.Val attrs[attr.Key] = attr.Val
} }
if n.Data == "meta" {
name := coalesce([]string{attrs["name"], attrs["property"]}) name := coalesce([]string{attrs["name"], attrs["property"]})
if name != "" { if name != "" {
matcher := matchers["meta/"+name] matcher := matchers["meta/"+name]
if matcher != nil { if matcher != nil {
matcher(&internalData, attrs["content"], "") matcher(internalData, attrs["content"], "")
} }
} }
} }
if n.Type == html.ElementNode && n.Data == "link" { if n.Data == "link" {
attrs := map[string]string{}
for _, attr := range n.Attr {
attrs[attr.Key] = attr.Val
}
names := strings.Split(attrs["rel"], " ") names := strings.Split(attrs["rel"], " ")
for _, name := range names { for _, name := range names {
matcher := matchers["link/"+name] matcher := matchers["link/"+name]
if matcher != nil { if matcher != nil {
matcher(&internalData, attrs["href"], attrs["type"]) matcher(internalData, attrs["href"], attrs["type"])
} }
} }
} }
if n.Type == html.ElementNode && n.Data == "title" { if n.Data == "title" {
c := n.FirstChild c := n.FirstChild
if c != nil && c.Type == html.TextNode { if c != nil && c.Type == html.TextNode {
matcher := matchers["title"] matcher := matchers["title"]
if matcher != nil { if matcher != nil {
matcher(&internalData, c.Data, "") matcher(internalData, c.Data, "")
}
} }
} }
} }
for c := n.FirstChild; c != nil; c = c.NextSibling { for c := n.FirstChild; c != nil; c = c.NextSibling {
parseNode(c) parseNode(matchers, internalData, c)
}
} }
internalData := InternalData{
url: base,
} }
contentType := strings.SplitN(resp.Header.Get("Content-Type"), ";", 2)[0] contentType := strings.SplitN(resp.Header.Get("Content-Type"), ";", 2)[0]
...@@ -463,38 +587,97 @@ func main() { ...@@ -463,38 +587,97 @@ func main() {
contentType == "application/xhtml" || contentType == "application/xhtml" ||
contentType == "application/xml" { contentType == "application/xml" {
doc, _ := html.Parse(resp.Body) doc, _ := html.Parse(resp.Body)
parseNode(doc) parseNode(matchers, &internalData, doc)
err = resp.Body.Close() err = resp.Body.Close()
if err != nil { if err != nil {
panic(err) panic(err)
} }
} else if strings.HasPrefix(contentType, "image/") {
internalData.rawType = "image"
} else if strings.HasPrefix(contentType, "video/") {
internalData.rawType = "video"
} else if strings.HasPrefix(contentType, "audio/") {
internalData.rawType = "audio"
} }
var oEmbedData OEmbed var oEmbedData OEmbed
if internalData.oembed_url != "" { if internalData.oembedUrl != "" {
fmt.Printf("Searching for %s\n", internalData.oembed_url) fmt.Printf("Searching for %s\n", internalData.oembedUrl)
resp, err := client.Get(internalData.oembed_url) resp, err := client.Get(internalData.oembedUrl)
if err == nil { if err == nil {
err = json.NewDecoder(resp.Body).Decode(&oEmbedData) err = json.NewDecoder(resp.Body).Decode(&oEmbedData)
} else {
fmt.Println(err.Error())
}
err = resp.Body.Close()
if err != nil {
fmt.Println(err.Error())
}
}
siteExists := func(siteUrl string) bool {
resp, err := client.Get(siteUrl)
if err == nil && resp.StatusCode == 200 {
return true
} else {
return false
} }
resp.Body.Close()
} }
data = buildData(internalData, oEmbedData) providerFallback1 := func(internalData *InternalData) {
providerUrl := resolve(internalData.url, "/")
fmt.Printf("Searching for %s\n", providerUrl)
providerResp, err := client.Get(providerUrl)
if err == nil {
contentType := strings.SplitN(providerResp.Header.Get("Content-Type"), ";", 2)[0]
if contentType == "text/html" ||
contentType == "application/xhtml+xml" ||
contentType == "application/xhtml" ||
contentType == "application/xml" {
doc, _ := html.Parse(providerResp.Body)
parseNode(providerMatchers, internalData, doc)
err = providerResp.Body.Close()
if err != nil {
panic(err)
}
}
}
}
providerFallback2 := func(internalData *InternalData) {
providerFaviconUrl := resolve(internalData.url, "/favicon.png")
fmt.Printf("Searching for %s\n", providerFaviconUrl)
if siteExists(providerFaviconUrl) {
internalData.urlProviderIconPng = providerFaviconUrl
}
}
providerFallback3 := func(internalData *InternalData) {
providerFaviconUrl := resolve(internalData.url, "/favicon.ico")
fmt.Printf("Searching for %s\n", providerFaviconUrl)
if siteExists(providerFaviconUrl) {
internalData.urlProviderIconIco = providerFaviconUrl
}
}
data = buildData(internalData, oEmbedData, []func(*InternalData){providerFallback1, providerFallback2, providerFallback3})
return data, err return data, err
} }
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
url := strings.TrimSpace(r.URL.Query().Get("url")) requestedUrl := strings.TrimSpace(r.URL.Query().Get("url"))
if url != "" { if requestedUrl != "" {
data, err := loadData(url) data, err := loadData(requestedUrl)
if err != nil { if err != nil {
panic(err.Error()) fmt.Println(err.Error())
return
} }
err = returnJson(w, data) err = returnJson(w, data)
if err != nil { if err != nil {
panic(err) fmt.Println(err.Error())
return
} }
} }
}) })
...@@ -503,11 +686,4 @@ func main() { ...@@ -503,11 +686,4 @@ func main() {
if err != nil { if err != nil {
panic(err) panic(err)
} }
/*
loadData("https://medium.com/slack-developer-blog/everything-you-ever-wanted-to-know-about-unfurling-but-were-afraid-to-ask-or-how-to-make-your-e64b4bb9254")
loadData("http://harvard.edu")
loadData("https://twitter.com/dw_politik/status/1092872739445104640")
loadData("https://twitter.com/raketenlurch/status/1093991675209416704")
*/
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment