diff --git a/.gitignore b/.gitignore index 9f33dd5af4003130062449c612da98a47d4a85ce..90a25872be567a530c20d6bef1134265dddfeda1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ vendor/ -.idea/ \ No newline at end of file +.idea/ +*.iml diff --git a/main.go b/main.go index 6f8c16ca4c26b55906fcf31da8e33dee07cb3f12..f31dcc4211b0d0d1dde0724003d244192e2daf44 100644 --- a/main.go +++ b/main.go @@ -23,9 +23,25 @@ func returnJson(w http.ResponseWriter, data interface{}) error { return nil } +type OEmbed struct { + Url string `json:"url"` + AuthorName string `json:"author_name"` + AuthorUrl string `json:"author_url"` + Html string `json:"html"` + Width int `json:"width"` + Height int `json:"height"` + Type string `json:"type"` + CacheAge string `json:"cache_age"` + ProviderName string `json:"provider_name"` + ProviderUrl string `json:"provider_url"` + Version string `json:"version"` +} + type InternalData struct { url string + oembed_url string + twitter_card string twitter_site string twitter_site_id string @@ -108,6 +124,8 @@ type Data struct { Text string `json:"text"` Fields []DataField `json:"fields"` ImageUrl string `json:"image_url"` + LargeImage bool `json:"large_image"` + Types []string `json:"types"` ThumbUrl string `json:"thumb_url"` Footer string `json:"footer"` FooterIcon string `json:"footer_icon"` @@ -124,7 +142,7 @@ func coalesce(list []string) string { return "" } -func buildData(in InternalData) (out Data) { +func buildData(in InternalData, oEmbed OEmbed) (out Data) { base, _ := url.ParseRequestURI(in.url) resolve := func(path string) string { if path == "" || base == nil { @@ -142,9 +160,11 @@ func buildData(in InternalData) (out Data) { out.TitleLink = in.url out.Title = coalesce([]string{in.twitter_title, in.og_title, in.meta_title, in.title}) out.Text = coalesce([]string{in.twitter_description, in.og_description, in.meta_description}) - out.AuthorName = coalesce([]string{in.meta_author, in.og_site_name}) - out.AuthorLink = resolve(coalesce([]string{in.article_author, in.link_author})) + out.AuthorName = coalesce([]string{oEmbed.AuthorName, in.meta_author}) + out.AuthorLink = resolve(coalesce([]string{oEmbed.AuthorUrl, in.article_author, in.link_author})) out.Color = coalesce([]string{in.meta_theme_color}) + out.Footer = coalesce([]string{oEmbed.ProviderName, in.og_site_name}) + out.FooterIcon = resolve(coalesce([]string{in.link_favicon})) var largeImages []string var smallImages []string @@ -156,210 +176,243 @@ func buildData(in InternalData) (out Data) { largeImages = append(largeImages, in.og_image) smallImages = append(smallImages, in.link_favicon) + largeTypes := []string{ + "video", + "video.other", + "article", + "summary_large_image", + "player", + } + + isLargeImage := func (format string) bool { + for _, t := range largeTypes { + if t == format { + return true + } + } + return false + } + + out.LargeImage = isLargeImage(oEmbed.Type) || isLargeImage(in.og_type) || isLargeImage(in.twitter_card) + + types := []string{} + for _, format := range []string{oEmbed.Type, in.og_type, in.twitter_card} { + if format != "" { + types = append(types, format) + } + } + out.Types = types + out.ThumbUrl = resolve(coalesce(smallImages)) out.ImageUrl = resolve(coalesce(largeImages)) + return } func main() { client := &http.Client{} - loadData := func(url string) { - fmt.Printf("Searching for %s\n", url) - resp, err := client.Get(url) - if err != nil { - if err != nil { - panic(err) - } - return - } + matchers := map[string]func(*InternalData, string, string){} + matchers["meta/twitter:card"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_card = content + } + matchers["meta/twitter:site"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_site = content + } + matchers["meta/twitter:site:id"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_site_id = content + } + matchers["meta/twitter:creator"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_creator = content + } + matchers["meta/twitter:creator:id"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_creator_id = content + } + matchers["meta/twitter:description"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_description = content + } + matchers["meta/twitter:title"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_title = content + } + matchers["meta/twitter:image"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_image = content + } + matchers["meta/twitter:image:src"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_image = content + } + matchers["meta/twitter:image:alt"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_image_alt = content + } + matchers["meta/twitter:player"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_player = content + } + matchers["meta/twitter:player:width"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_player_width = content + } + matchers["meta/twitter:player:height"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_player_height = content + } + matchers["meta/twitter:player:stream"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_player_stream = content + } + matchers["meta/twitter:app:name:iphone"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_app_name_iphone = content + } + matchers["meta/twitter:app:id:iphone"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_app_id_iphone = content + } + matchers["meta/twitter:app:url:iphone"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_app_url_iphone = content + } + matchers["meta/twitter:app:name:ipad"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_app_name_ipad = content + } + matchers["meta/twitter:app:id:ipad"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_app_id_ipad = content + } + matchers["meta/twitter:app:url:ipad"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_app_url_ipad = content + } + matchers["meta/twitter:app:name:googleplay"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_app_name_googleplay = content + } + matchers["meta/twitter:app:id:googleplay"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_app_id_googleplay = content + } + matchers["meta/twitter:app:url:googleplay"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_app_url_googleplay = content + } + matchers["meta/twitter:label1"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_label1 = content + } + matchers["meta/twitter:data1"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_data1 = content + } + matchers["meta/twitter:label2"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_label2 = content + } + matchers["meta/twitter:data2"] = func(internalData *InternalData, content string, extra string) { + internalData.twitter_data2 = content + } - internalData := InternalData{ - url: url, - } + matchers["meta/og:url"] = func(internalData *InternalData, content string, extra string) { + internalData.og_url = content + } + matchers["meta/og:title"] = func(internalData *InternalData, content string, extra string) { + internalData.og_title = content + } + matchers["meta/og:description"] = func(internalData *InternalData, content string, extra string) { + internalData.og_description = content + } + matchers["meta/og:type"] = func(internalData *InternalData, content string, extra string) { + internalData.og_type = content + } + matchers["meta/og:locale"] = func(internalData *InternalData, content string, extra string) { + internalData.og_locale = content + } - matchers := map[string]func(string){} - matchers["meta/twitter:card"] = func(content string) { - internalData.twitter_card = content - } - matchers["meta/twitter:site"] = func(content string) { - internalData.twitter_site = content - } - matchers["meta/twitter:site:id"] = func(content string) { - internalData.twitter_site_id = content - } - matchers["meta/twitter:creator"] = func(content string) { - internalData.twitter_creator = content - } - matchers["meta/twitter:creator:id"] = func(content string) { - internalData.twitter_creator_id = content - } - matchers["meta/twitter:description"] = func(content string) { - internalData.twitter_description = content - } - matchers["meta/twitter:title"] = func(content string) { - internalData.twitter_title = content - } - matchers["meta/twitter:image"] = func(content string) { - internalData.twitter_image = content - } - matchers["meta/twitter:image:src"] = func(content string) { - internalData.twitter_image = content - } - matchers["meta/twitter:image:alt"] = func(content string) { - internalData.twitter_image_alt = content - } - matchers["meta/twitter:player"] = func(content string) { - internalData.twitter_player = content - } - matchers["meta/twitter:player:width"] = func(content string) { - internalData.twitter_player_width = content - } - matchers["meta/twitter:player:height"] = func(content string) { - internalData.twitter_player_height = content - } - matchers["meta/twitter:player:stream"] = func(content string) { - internalData.twitter_player_stream = content - } - matchers["meta/twitter:app:name:iphone"] = func(content string) { - internalData.twitter_app_name_iphone = content - } - matchers["meta/twitter:app:id:iphone"] = func(content string) { - internalData.twitter_app_id_iphone = content - } - matchers["meta/twitter:app:url:iphone"] = func(content string) { - internalData.twitter_app_url_iphone = content - } - matchers["meta/twitter:app:name:ipad"] = func(content string) { - internalData.twitter_app_name_ipad = content - } - matchers["meta/twitter:app:id:ipad"] = func(content string) { - internalData.twitter_app_id_ipad = content - } - matchers["meta/twitter:app:url:ipad"] = func(content string) { - internalData.twitter_app_url_ipad = content - } - matchers["meta/twitter:app:name:googleplay"] = func(content string) { - internalData.twitter_app_name_googleplay = content - } - matchers["meta/twitter:app:id:googleplay"] = func(content string) { - internalData.twitter_app_id_googleplay = content - } - matchers["meta/twitter:app:url:googleplay"] = func(content string) { - internalData.twitter_app_url_googleplay = content - } - matchers["meta/twitter:label1"] = func(content string) { - internalData.twitter_label1 = content - } - matchers["meta/twitter:data1"] = func(content string) { - internalData.twitter_data1 = content - } - matchers["meta/twitter:label2"] = func(content string) { - internalData.twitter_label2 = content - } - matchers["meta/twitter:data2"] = func(content string) { - internalData.twitter_data2 = content - } + matchers["meta/og:video"] = func(internalData *InternalData, content string, extra string) { + internalData.og_video = content + } + matchers["meta/og:video:url"] = func(internalData *InternalData, content string, extra string) { + internalData.og_video_url = content + } + matchers["meta/og:video:secure_url"] = func(internalData *InternalData, content string, extra string) { + internalData.og_video_secure_url = content + } + matchers["meta/og:video:type"] = func(internalData *InternalData, content string, extra string) { + internalData.og_video_type = content + } + matchers["meta/og:video:width"] = func(internalData *InternalData, content string, extra string) { + internalData.og_video_width = content + } + matchers["meta/og:video:height"] = func(internalData *InternalData, content string, extra string) { + internalData.og_video_height = content + } - matchers["meta/og:url"] = func(content string) { - internalData.og_url = content - } - matchers["meta/og:title"] = func(content string) { - internalData.og_title = content - } - matchers["meta/og:description"] = func(content string) { - internalData.og_description = content - } - matchers["meta/og:type"] = func(content string) { - internalData.og_type = content - } - matchers["meta/og:locale"] = func(content string) { - internalData.og_locale = content - } + matchers["meta/og:image"] = func(internalData *InternalData, content string, extra string) { + internalData.og_image = content + } + matchers["meta/og:image:url"] = func(internalData *InternalData, content string, extra string) { + internalData.og_image_url = content + } + matchers["meta/og:image:secure_url"] = func(internalData *InternalData, content string, extra string) { + internalData.og_image_secure_url = content + } + matchers["meta/og:image:type"] = func(internalData *InternalData, content string, extra string) { + internalData.og_image_type = content + } + matchers["meta/og:image:width"] = func(internalData *InternalData, content string, extra string) { + internalData.og_image_width = content + } + matchers["meta/og:image:height"] = func(internalData *InternalData, content string, extra string) { + internalData.og_image_height = content + } - matchers["meta/og:video"] = func(content string) { - internalData.og_video = content - } - matchers["meta/og:video:url"] = func(content string) { - internalData.og_video_url = content - } - matchers["meta/og:video:secure_url"] = func(content string) { - internalData.og_video_secure_url = content - } - matchers["meta/og:video:type"] = func(content string) { - internalData.og_video_type = content - } - matchers["meta/og:video:width"] = func(content string) { - internalData.og_video_width = content - } - matchers["meta/og:video:height"] = func(content string) { - internalData.og_video_height = content - } + matchers["meta/og:audio"] = func(internalData *InternalData, content string, extra string) { + internalData.og_audio = content + } + matchers["meta/og:audio:url"] = func(internalData *InternalData, content string, extra string) { + internalData.og_audio_url = content + } + matchers["meta/og:audio:secure_url"] = func(internalData *InternalData, content string, extra string) { + internalData.og_audio_secure_url = content + } + matchers["meta/og:audio:type"] = func(internalData *InternalData, content string, extra string) { + internalData.og_audio_type = content + } - matchers["meta/og:image"] = func(content string) { - internalData.og_image = content - } - matchers["meta/og:image:url"] = func(content string) { - internalData.og_image_url = content - } - matchers["meta/og:image:secure_url"] = func(content string) { - internalData.og_image_secure_url = content - } - matchers["meta/og:image:type"] = func(content string) { - internalData.og_image_type = content - } - matchers["meta/og:image:width"] = func(content string) { - internalData.og_image_width = content - } - matchers["meta/og:image:height"] = func(content string) { - internalData.og_image_height = content - } + matchers["meta/og:site_name"] = func(internalData *InternalData, content string, extra string) { + internalData.og_site_name = content + } - matchers["meta/og:audio"] = func(content string) { - internalData.og_audio = content - } - matchers["meta/og:audio:url"] = func(content string) { - internalData.og_audio_url = content - } - matchers["meta/og:audio:secure_url"] = func(content string) { - internalData.og_audio_secure_url = content - } - matchers["meta/og:audio:type"] = func(content string) { - internalData.og_audio_type = content - } + matchers["meta/article:author"] = func(internalData *InternalData, content string, extra string) { + internalData.article_author = content + } + matchers["meta/article:published_time"] = func(internalData *InternalData, content string, extra string) { + internalData.article_published_time = content + } - matchers["meta/og:site_name"] = func(content string) { - internalData.og_site_name = content - } + matchers["meta/title"] = func(internalData *InternalData, content string, extra string) { + internalData.meta_title = content + } + matchers["meta/description"] = func(internalData *InternalData, content string, extra string) { + internalData.meta_description = content + } + matchers["meta/author"] = func(internalData *InternalData, content string, extra string) { + internalData.meta_author = content + } + matchers["meta/theme-color"] = func(internalData *InternalData, content string, extra string) { + internalData.meta_theme_color = content + } - matchers["meta/article:author"] = func(content string) { - internalData.article_author = content - } - matchers["meta/article:published_time"] = func(content string) { - internalData.article_published_time = content + matchers["link/icon"] = func(internalData *InternalData, content string, extra string) { + internalData.link_favicon = content + } + matchers["link/author"] = func(internalData *InternalData, content string, extra string) { + internalData.link_author = content + } + matchers["link/alternate"] = func(internalData *InternalData, content string, extra string) { + if extra == "application/json+oembed" { + internalData.oembed_url = content } + } + matchers["title"] = func(internalData *InternalData, content string, extra string) { + internalData.title = content + } - matchers["meta/title"] = func(content string) { - internalData.meta_title = content - } - matchers["meta/description"] = func(content string) { - internalData.meta_description = content - } - matchers["meta/author"] = func(content string) { - internalData.meta_author = content - } - matchers["meta/theme-color"] = func(content string) { - internalData.meta_theme_color = content - } + loadData := func(url string) (Data, error) { + fmt.Printf("Searching for %s\n", url) - matchers["link/icon"] = func(content string) { - internalData.link_favicon = content - } - matchers["link/author"] = func(content string) { - internalData.link_author = content + var data Data + + resp, err := client.Get(url) + if err != nil { + return data, err } - matchers["title"] = func(content string) { - internalData.title = content + + internalData := InternalData{ + url: url, } var parseNode func(*html.Node) @@ -373,7 +426,7 @@ func main() { if name != "" { matcher := matchers["meta/"+name] if matcher != nil { - matcher(attrs["content"]) + matcher(&internalData, attrs["content"], "") } } } @@ -386,7 +439,7 @@ func main() { for _, name := range names { matcher := matchers["link/"+name] if matcher != nil { - matcher(attrs["href"]) + matcher(&internalData, attrs["href"], attrs["type"]) } } } @@ -395,7 +448,7 @@ func main() { if c != nil && c.Type == html.TextNode { matcher := matchers["title"] if matcher != nil { - matcher(c.Data) + matcher(&internalData, c.Data, "") } } } @@ -404,36 +457,57 @@ func main() { } } - doc, _ := html.Parse(resp.Body) - parseNode(doc) - err = resp.Body.Close() - if err != nil { - panic(err) + contentType := strings.SplitN(resp.Header.Get("Content-Type"), ";", 2)[0] + if contentType == "text/html" || + contentType == "application/xhtml+xml" || + contentType == "application/xhtml" || + contentType == "application/xml" { + doc, _ := html.Parse(resp.Body) + parseNode(doc) + err = resp.Body.Close() + if err != nil { + panic(err) + } } - data := buildData(internalData) + var oEmbedData OEmbed - marshalled, err := json.Marshal(data) - if err != nil { - panic(err) + if internalData.oembed_url != "" { + fmt.Printf("Searching for %s\n", internalData.oembed_url) + resp, err := client.Get(internalData.oembed_url) + if err == nil { + err = json.NewDecoder(resp.Body).Decode(&oEmbedData) + } + resp.Body.Close() } - fmt.Printf("%+v\n", internalData) - fmt.Println(string(marshalled)) + data = buildData(internalData, oEmbedData) + return data, err } - /* http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { - loadData(strings.TrimSpace(r.URL.Query().Get("url"))) + url := strings.TrimSpace(r.URL.Query().Get("url")) + if url != "" { + data, err := loadData(url) + if err != nil { + panic(err.Error()) + } + err = returnJson(w, data) + if err != nil { + panic(err) + } + } }) err := http.ListenAndServe(":8080", nil) if err != nil { panic(err) } - */ + /* loadData("https://medium.com/slack-developer-blog/everything-you-ever-wanted-to-know-about-unfurling-but-were-afraid-to-ask-or-how-to-make-your-e64b4bb9254") loadData("http://harvard.edu") loadData("https://twitter.com/dw_politik/status/1092872739445104640") + loadData("https://twitter.com/raketenlurch/status/1093991675209416704") + */ }