package app import ( "fmt" "io" "net/http" "net/url" "strings" "github.com/PuerkitoBio/goquery" ) func downloadAssetFile(url string) ([]byte, error) { resp, err := http.Get(url) if err != nil { return nil, err } defer resp.Body.Close() return io.ReadAll(resp.Body) } // FetchArticleHTML returns page's title, html and error func FetchArticleHTML(urlToFetch string) (string, string, error) { url, err := url.Parse(urlToFetch) if err != nil { return "", "", err } resp, err := http.Get(url.String()) if err != nil { return "", "", err } defer resp.Body.Close() doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return "", "", err } doc.Find(`head link[rel="stylesheet"]`).Each(func(i int, s *goquery.Selection) { v, ok := s.Attr("href") if !ok || v == "" { return } if strings.HasPrefix(v, "/") { styles, err := downloadAssetFile(fmt.Sprintf("https://%s%s", url.Hostname(), v)) if err != nil { return } doc.Find("head").AppendHtml("") s.Remove() } }) doc.Find("a").Each(func(i int, s *goquery.Selection) { href, ok := s.Attr("href") if !ok || href == "" { return } if strings.HasPrefix(href, "/") { s.SetAttr("href", "https://"+url.Hostname()+href) } }) doc.Find("script").Each(func(i int, s *goquery.Selection) { s.Remove() }) title := doc.Find("head title").Text() html, _ := doc.Html() return title, html, err }