add description & image for articles
This commit is contained in:
@@ -20,22 +20,31 @@ func downloadAssetFile(url string) ([]byte, error) {
|
||||
return io.ReadAll(resp.Body)
|
||||
}
|
||||
|
||||
// FetchArticleHTML returns page's title, html and error
|
||||
func FetchArticleHTML(urlToFetch string) (string, string, error) {
|
||||
type page struct {
|
||||
Title string
|
||||
Description string
|
||||
Body string
|
||||
ImageURL string
|
||||
}
|
||||
|
||||
// FetchArticleHTML returns page struct and error
|
||||
func FetchArticleHTML(urlToFetch string) (page, error) {
|
||||
page := page{}
|
||||
|
||||
url, err := url.Parse(urlToFetch)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
return page, err
|
||||
}
|
||||
|
||||
resp, err := http.Get(url.String())
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
return page, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
return page, err
|
||||
}
|
||||
|
||||
doc.Find(`head link[rel="stylesheet"]`).Each(func(i int, s *goquery.Selection) {
|
||||
@@ -65,12 +74,59 @@ func FetchArticleHTML(urlToFetch string) (string, string, error) {
|
||||
}
|
||||
})
|
||||
|
||||
doc.Find("img").Each(func(i int, s *goquery.Selection) {
|
||||
src, ok := s.Attr("src")
|
||||
if !ok || src == "" {
|
||||
return
|
||||
}
|
||||
|
||||
if strings.HasPrefix(src, "/") {
|
||||
s.SetAttr("src", "https://"+url.Hostname()+src)
|
||||
}
|
||||
})
|
||||
|
||||
doc.Find("script").Each(func(i int, s *goquery.Selection) {
|
||||
s.Remove()
|
||||
})
|
||||
|
||||
title := doc.Find("head title").Text()
|
||||
page.Title = doc.Find("head title").Text()
|
||||
page.Body, _ = doc.Html()
|
||||
|
||||
html, _ := doc.Html()
|
||||
return title, html, err
|
||||
page.Description = doc.Find(`head meta[name="description"]`).AttrOr("content", "")
|
||||
if page.Description == "" {
|
||||
page.Description = doc.Find(`head meta[property="og:description"]`).AttrOr("content", "")
|
||||
}
|
||||
|
||||
page.ImageURL = doc.Find(`head meta[property="og:image"]`).AttrOr("content", "")
|
||||
if page.ImageURL == "" {
|
||||
doc.Find("img").Each(func(i int, s *goquery.Selection) {
|
||||
if page.ImageURL != "" {
|
||||
return
|
||||
}
|
||||
|
||||
src, exists := s.Attr("src")
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
|
||||
u, err := url.Parse(src)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
allowedTypes := []string{"png", "jpg", "jpeg", "webp"}
|
||||
isAllowed := false
|
||||
for _, tp := range allowedTypes {
|
||||
if strings.HasSuffix(u.Path, tp) {
|
||||
isAllowed = true
|
||||
}
|
||||
}
|
||||
|
||||
if isAllowed {
|
||||
page.ImageURL = src
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return page, err
|
||||
}
|
||||
|
||||
40
api/main.go
40
api/main.go
@@ -36,13 +36,15 @@ type User struct {
|
||||
}
|
||||
|
||||
type Article struct {
|
||||
ID int64 `json:"id" db:"id"`
|
||||
Title string `json:"title" db:"title"`
|
||||
URL string `json:"url" db:"url"`
|
||||
Body []byte `json:"-" db:"body"`
|
||||
UserID int64 `json:"-" db:"user_id"`
|
||||
CreatedAt string `json:"created_at" db:"created_at"`
|
||||
UpdatedAt string `json:"updated_at" db:"updated_at"`
|
||||
ID int64 `json:"id" db:"id"`
|
||||
Title string `json:"title" db:"title"`
|
||||
URL string `json:"url" db:"url"`
|
||||
Body []byte `json:"-" db:"body"`
|
||||
Description *string `json:"description" db:"description"`
|
||||
Image *[]byte `json:"image" db:"image"`
|
||||
UserID int64 `json:"-" db:"user_id"`
|
||||
CreatedAt string `json:"created_at" db:"created_at"`
|
||||
UpdatedAt string `json:"updated_at" db:"updated_at"`
|
||||
}
|
||||
|
||||
func readJSON(r *http.Request, s any) error {
|
||||
@@ -121,6 +123,15 @@ func getUserIdFromRequest(r *http.Request) (int64, error) {
|
||||
return userId, nil
|
||||
}
|
||||
|
||||
func downloadImage(url string) ([]byte, error) {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
return io.ReadAll(resp.Body)
|
||||
}
|
||||
|
||||
type AuthResponse struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
}
|
||||
@@ -317,7 +328,7 @@ func main() {
|
||||
return
|
||||
}
|
||||
|
||||
title, html, err := app.FetchArticleHTML(body.URL)
|
||||
page, err := app.FetchArticleHTML(body.URL)
|
||||
if err != nil {
|
||||
sendApiError(w, "couldn't fetch article", err, 500)
|
||||
return
|
||||
@@ -325,10 +336,19 @@ func main() {
|
||||
|
||||
var b bytes.Buffer
|
||||
gw := gzip.NewWriter(&b)
|
||||
gw.Write([]byte(html))
|
||||
gw.Write([]byte(page.Body))
|
||||
gw.Close()
|
||||
|
||||
res, err := db.Exec("INSERT INTO articles (title, url, body, user_id) VALUES (?, ?, ?, ?)", title, body.URL, b.Bytes(), userId)
|
||||
var imgData []byte
|
||||
if page.ImageURL != "" {
|
||||
imgData, err = downloadImage(page.ImageURL)
|
||||
if err != nil {
|
||||
sendApiError(w, "couldn't download image from url", err, 500)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
res, err := db.Exec("INSERT INTO articles (title, url, description, image, body, user_id) VALUES (?, ?, ?, ?, ?, ?)", page.Title, body.URL, page.Description, imgData, b.Bytes(), userId)
|
||||
if err != nil {
|
||||
sendApiError(w, "couldn't save article", err, 500)
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user