Skip to content
Snippets Groups Projects
Verified Commit c2c3f90b authored by Janne Mareike Koschinski's avatar Janne Mareike Koschinski
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
Pipeline #404 failed
vendor/
.idea/
\ No newline at end of file
FROM golang:alpine as go_builder
RUN apk add --no-cache curl
RUN curl https://glide.sh/get | sh
WORKDIR /go/src/app
COPY glide.* ./
RUN glide install
COPY *.go ./
RUN CGO_ENABLED=0 GOOS=linux go build -a app .
FROM scratch
COPY --from=go_builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
COPY --from=go_builder /go/src/app/app /app
ENTRYPOINT ["/app"]
\ No newline at end of file
#!/bin/sh
IMAGE=k8r.eu/justjanne/embed-helper
TAGS=$(git describe --always --tags HEAD)
docker build -t $IMAGE:$TAGS .
docker tag $IMAGE:$TAGS $IMAGE:latest
echo Successfully tagged $IMAGE:latest
docker push $IMAGE:$TAGS
docker push $IMAGE:latest
#!/bin/sh
IMAGE=k8r.eu/justjanne/embed-helper
TAGS=$(git describe --always --tags HEAD)
DEPLOYMENT=embed-helper
POD=embed-helper
kubectl set image deployment/$DEPLOYMENT $POD=$IMAGE:$TAGS
hash: 580579292d28074fd3d1d1dc8839c97e8dad3795380023d934db98c6e8d47158
updated: 2019-02-04T17:02:06.067158062+01:00
imports:
- name: github.com/andybalholm/cascadia
version: 680b6a57bda4f657485ad44bdea42342ead737bc
- name: github.com/dyatlov/go-htmlinfo
version: d9417c75de65fbae5b16f4038b421c9e1cc61f0d
subpackages:
- htmlinfo
- name: github.com/dyatlov/go-oembed
version: 4bc5ab7a42e91e45e957f9b0c47f911efed16710
subpackages:
- oembed
- name: github.com/dyatlov/go-opengraph
version: 816b6608b3c8c1e871bc9cf777f390e2532081fe
subpackages:
- opengraph
- name: github.com/dyatlov/go-readability
version: e7b2080f87f824caa519ed33dac6d4e4e413616d
- name: github.com/Jeffail/gabs
version: 7a0fed31069aba77993a518cc2f37b28ee7aa883
- name: github.com/PuerkitoBio/goquery
version: 3dcf72e6c17f694381a21592651ca1464ded0e10
- name: golang.org/x/net
version: bc3663df0ac92f928d419e31e0d2af22e683a5a2
subpackages:
- html
- html/atom
- html/charset
- name: golang.org/x/text
version: f28f36722d5ef2f9655ad3de1f248e3e52ad5ebd
subpackages:
- encoding
- encoding/charmap
- encoding/htmlindex
- encoding/internal
- encoding/internal/identifier
- encoding/japanese
- encoding/korean
- encoding/simplifiedchinese
- encoding/traditionalchinese
- encoding/unicode
- internal/tag
- internal/utf8internal
- language
- runes
- transform
testImports: []
package: git.kuschku.de/justjanne/embed-helper
import:
- package: github.com/dyatlov/go-htmlinfo/htmlinfo
\ No newline at end of file
main.go 0 → 100644
package main
import (
"encoding/json"
"fmt"
"golang.org/x/net/html"
"net/http"
"net/url"
"strings"
)
func returnJson(w http.ResponseWriter, data interface{}) error {
marshalled, err := json.Marshal(data)
if err != nil {
return err
}
w.Header().Add("Content-Type", "application/json")
if _, err := w.Write(marshalled); err != nil {
return err
}
return nil
}
type InternalData struct {
url string
twitter_card string
twitter_site string
twitter_site_id string
twitter_creator string
twitter_creator_id string
twitter_description string
twitter_title string
twitter_image string
twitter_image_alt string
twitter_player string
twitter_player_width string
twitter_player_height string
twitter_player_stream string
twitter_app_name_iphone string
twitter_app_id_iphone string
twitter_app_url_iphone string
twitter_app_name_ipad string
twitter_app_id_ipad string
twitter_app_url_ipad string
twitter_app_name_googleplay string
twitter_app_id_googleplay string
twitter_app_url_googleplay string
twitter_label1 string
twitter_data1 string
twitter_label2 string
twitter_data2 string
og_url string
og_title string
og_description string
og_type string
og_locale string
og_video string
og_video_url string
og_video_secure_url string
og_video_type string
og_video_width string
og_video_height string
og_image string
og_image_url string
og_image_secure_url string
og_image_type string
og_image_width string
og_image_height string
og_audio string
og_audio_url string
og_audio_secure_url string
og_audio_type string
og_site_name string
article_author string
article_published_time string
meta_title string
meta_description string
meta_author string
meta_theme_color string
link_favicon string
link_author string
title string
}
type DataField struct {
Title string `json:"title"`
Value string `json:"value"`
Short bool `json:"short"`
}
type Data struct {
Color string `json:"color"`
AuthorName string `json:"author_name"`
AuthorLink string `json:"author_link"`
AuthorIcon string `json:"author_icon"`
Title string `json:"title"`
TitleLink string `json:"title_link"`
Text string `json:"text"`
Fields []DataField `json:"fields"`
ImageUrl string `json:"image_url"`
ThumbUrl string `json:"thumb_url"`
Footer string `json:"footer"`
FooterIcon string `json:"footer_icon"`
Ts int `json:"ts"`
}
func coalesce(list []string) string {
for _, str := range list {
str = strings.TrimSpace(str)
if str != "" {
return str
}
}
return ""
}
func buildData(in InternalData) (out Data) {
base, _ := url.ParseRequestURI(in.url)
resolve := func(path string) string {
if path == "" || base == nil {
return ""
}
relative, err := url.Parse(path)
if err != nil {
return ""
}
absolute := base.ResolveReference(relative)
return absolute.String()
}
out = Data{}
out.TitleLink = in.url
out.Title = coalesce([]string{in.twitter_title, in.og_title, in.meta_title, in.title})
out.Text = coalesce([]string{in.twitter_description, in.og_description, in.meta_description})
out.AuthorName = coalesce([]string{in.meta_author, in.og_site_name})
out.AuthorLink = resolve(coalesce([]string{in.article_author, in.link_author}))
out.Color = coalesce([]string{in.meta_theme_color})
var largeImages []string
var smallImages []string
if in.twitter_card == "summary" || in.twitter_card == "summary_large_image" {
largeImages = append(largeImages, in.twitter_image)
} else {
smallImages = append(smallImages, in.twitter_image)
}
largeImages = append(largeImages, in.og_image)
smallImages = append(smallImages, in.link_favicon)
out.ThumbUrl = resolve(coalesce(smallImages))
out.ImageUrl = resolve(coalesce(largeImages))
return
}
func main() {
client := &http.Client{}
loadData := func(url string) {
fmt.Printf("Searching for %s\n", url)
resp, err := client.Get(url)
if err != nil {
if err != nil {
panic(err)
}
return
}
internalData := InternalData{
url: url,
}
matchers := map[string]func(string){}
matchers["meta/twitter:card"] = func(content string) {
internalData.twitter_card = content
}
matchers["meta/twitter:site"] = func(content string) {
internalData.twitter_site = content
}
matchers["meta/twitter:site:id"] = func(content string) {
internalData.twitter_site_id = content
}
matchers["meta/twitter:creator"] = func(content string) {
internalData.twitter_creator = content
}
matchers["meta/twitter:creator:id"] = func(content string) {
internalData.twitter_creator_id = content
}
matchers["meta/twitter:description"] = func(content string) {
internalData.twitter_description = content
}
matchers["meta/twitter:title"] = func(content string) {
internalData.twitter_title = content
}
matchers["meta/twitter:image"] = func(content string) {
internalData.twitter_image = content
}
matchers["meta/twitter:image:src"] = func(content string) {
internalData.twitter_image = content
}
matchers["meta/twitter:image:alt"] = func(content string) {
internalData.twitter_image_alt = content
}
matchers["meta/twitter:player"] = func(content string) {
internalData.twitter_player = content
}
matchers["meta/twitter:player:width"] = func(content string) {
internalData.twitter_player_width = content
}
matchers["meta/twitter:player:height"] = func(content string) {
internalData.twitter_player_height = content
}
matchers["meta/twitter:player:stream"] = func(content string) {
internalData.twitter_player_stream = content
}
matchers["meta/twitter:app:name:iphone"] = func(content string) {
internalData.twitter_app_name_iphone = content
}
matchers["meta/twitter:app:id:iphone"] = func(content string) {
internalData.twitter_app_id_iphone = content
}
matchers["meta/twitter:app:url:iphone"] = func(content string) {
internalData.twitter_app_url_iphone = content
}
matchers["meta/twitter:app:name:ipad"] = func(content string) {
internalData.twitter_app_name_ipad = content
}
matchers["meta/twitter:app:id:ipad"] = func(content string) {
internalData.twitter_app_id_ipad = content
}
matchers["meta/twitter:app:url:ipad"] = func(content string) {
internalData.twitter_app_url_ipad = content
}
matchers["meta/twitter:app:name:googleplay"] = func(content string) {
internalData.twitter_app_name_googleplay = content
}
matchers["meta/twitter:app:id:googleplay"] = func(content string) {
internalData.twitter_app_id_googleplay = content
}
matchers["meta/twitter:app:url:googleplay"] = func(content string) {
internalData.twitter_app_url_googleplay = content
}
matchers["meta/twitter:label1"] = func(content string) {
internalData.twitter_label1 = content
}
matchers["meta/twitter:data1"] = func(content string) {
internalData.twitter_data1 = content
}
matchers["meta/twitter:label2"] = func(content string) {
internalData.twitter_label2 = content
}
matchers["meta/twitter:data2"] = func(content string) {
internalData.twitter_data2 = content
}
matchers["meta/og:url"] = func(content string) {
internalData.og_url = content
}
matchers["meta/og:title"] = func(content string) {
internalData.og_title = content
}
matchers["meta/og:description"] = func(content string) {
internalData.og_description = content
}
matchers["meta/og:type"] = func(content string) {
internalData.og_type = content
}
matchers["meta/og:locale"] = func(content string) {
internalData.og_locale = content
}
matchers["meta/og:video"] = func(content string) {
internalData.og_video = content
}
matchers["meta/og:video:url"] = func(content string) {
internalData.og_video_url = content
}
matchers["meta/og:video:secure_url"] = func(content string) {
internalData.og_video_secure_url = content
}
matchers["meta/og:video:type"] = func(content string) {
internalData.og_video_type = content
}
matchers["meta/og:video:width"] = func(content string) {
internalData.og_video_width = content
}
matchers["meta/og:video:height"] = func(content string) {
internalData.og_video_height = content
}
matchers["meta/og:image"] = func(content string) {
internalData.og_image = content
}
matchers["meta/og:image:url"] = func(content string) {
internalData.og_image_url = content
}
matchers["meta/og:image:secure_url"] = func(content string) {
internalData.og_image_secure_url = content
}
matchers["meta/og:image:type"] = func(content string) {
internalData.og_image_type = content
}
matchers["meta/og:image:width"] = func(content string) {
internalData.og_image_width = content
}
matchers["meta/og:image:height"] = func(content string) {
internalData.og_image_height = content
}
matchers["meta/og:audio"] = func(content string) {
internalData.og_audio = content
}
matchers["meta/og:audio:url"] = func(content string) {
internalData.og_audio_url = content
}
matchers["meta/og:audio:secure_url"] = func(content string) {
internalData.og_audio_secure_url = content
}
matchers["meta/og:audio:type"] = func(content string) {
internalData.og_audio_type = content
}
matchers["meta/og:site_name"] = func(content string) {
internalData.og_site_name = content
}
matchers["meta/article:author"] = func(content string) {
internalData.article_author = content
}
matchers["meta/article:published_time"] = func(content string) {
internalData.article_published_time = content
}
matchers["meta/title"] = func(content string) {
internalData.meta_title = content
}
matchers["meta/description"] = func(content string) {
internalData.meta_description = content
}
matchers["meta/author"] = func(content string) {
internalData.meta_author = content
}
matchers["meta/theme-color"] = func(content string) {
internalData.meta_theme_color = content
}
matchers["link/icon"] = func(content string) {
internalData.link_favicon = content
}
matchers["link/author"] = func(content string) {
internalData.link_author = content
}
matchers["title"] = func(content string) {
internalData.title = content
}
var parseNode func(*html.Node)
parseNode = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "meta" {
attrs := map[string]string{}
for _, attr := range n.Attr {
attrs[attr.Key] = attr.Val
}
name := coalesce([]string{attrs["name"], attrs["property"]})
if name != "" {
matcher := matchers["meta/"+name]
if matcher != nil {
matcher(attrs["content"])
}
}
}
if n.Type == html.ElementNode && n.Data == "link" {
attrs := map[string]string{}
for _, attr := range n.Attr {
attrs[attr.Key] = attr.Val
}
names := strings.Split(attrs["rel"], " ")
for _, name := range names {
matcher := matchers["link/"+name]
if matcher != nil {
matcher(attrs["href"])
}
}
}
if n.Type == html.ElementNode && n.Data == "title" {
c := n.FirstChild
if c != nil && c.Type == html.TextNode {
matcher := matchers["title"]
if matcher != nil {
matcher(c.Data)
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
parseNode(c)
}
}
doc, _ := html.Parse(resp.Body)
parseNode(doc)
err = resp.Body.Close()
if err != nil {
panic(err)
}
data := buildData(internalData)
marshalled, err := json.Marshal(data)
if err != nil {
panic(err)
}
fmt.Printf("%+v\n", internalData)
fmt.Println(string(marshalled))
}
/*
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
loadData(strings.TrimSpace(r.URL.Query().Get("url")))
})
err := http.ListenAndServe(":8080", nil)
if err != nil {
panic(err)
}
*/
loadData("https://medium.com/slack-developer-blog/everything-you-ever-wanted-to-know-about-unfurling-but-were-afraid-to-ask-or-how-to-make-your-e64b4bb9254")
loadData("http://harvard.edu")
loadData("https://twitter.com/dw_politik/status/1092872739445104640")
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment