77 lines
1.4 KiB
Go
77 lines
1.4 KiB
Go
package app
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
func downloadAssetFile(url string) ([]byte, error) {
|
|
resp, err := http.Get(url)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
return io.ReadAll(resp.Body)
|
|
}
|
|
|
|
// FetchArticleHTML returns page's title, html and error
|
|
func FetchArticleHTML(urlToFetch string) (string, string, error) {
|
|
url, err := url.Parse(urlToFetch)
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
|
|
resp, err := http.Get(url.String())
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
|
|
doc.Find(`head link[rel="stylesheet"]`).Each(func(i int, s *goquery.Selection) {
|
|
v, ok := s.Attr("href")
|
|
if !ok || v == "" {
|
|
return
|
|
}
|
|
|
|
if strings.HasPrefix(v, "/") {
|
|
styles, err := downloadAssetFile(fmt.Sprintf("https://%s%s", url.Hostname(), v))
|
|
if err != nil {
|
|
return
|
|
}
|
|
doc.Find("head").AppendHtml("<style>" + string(styles) + "</style>")
|
|
s.Remove()
|
|
}
|
|
})
|
|
|
|
doc.Find("a").Each(func(i int, s *goquery.Selection) {
|
|
href, ok := s.Attr("href")
|
|
if !ok || href == "" {
|
|
return
|
|
}
|
|
|
|
if strings.HasPrefix(href, "/") {
|
|
s.SetAttr("href", "https://"+url.Hostname()+href)
|
|
}
|
|
})
|
|
|
|
doc.Find("script").Each(func(i int, s *goquery.Selection) {
|
|
s.Remove()
|
|
})
|
|
|
|
title := doc.Find("head title").Text()
|
|
|
|
html, _ := doc.Html()
|
|
return title, html, err
|
|
}
|