diff --git a/api/app/archive.go b/api/app/archive.go index 78ea17f..ac4e0a0 100644 --- a/api/app/archive.go +++ b/api/app/archive.go @@ -20,22 +20,31 @@ func downloadAssetFile(url string) ([]byte, error) { return io.ReadAll(resp.Body) } -// FetchArticleHTML returns page's title, html and error -func FetchArticleHTML(urlToFetch string) (string, string, error) { +type page struct { + Title string + Description string + Body string + ImageURL string +} + +// FetchArticleHTML returns page struct and error +func FetchArticleHTML(urlToFetch string) (page, error) { + page := page{} + url, err := url.Parse(urlToFetch) if err != nil { - return "", "", err + return page, err } resp, err := http.Get(url.String()) if err != nil { - return "", "", err + return page, err } defer resp.Body.Close() doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { - return "", "", err + return page, err } doc.Find(`head link[rel="stylesheet"]`).Each(func(i int, s *goquery.Selection) { @@ -65,12 +74,59 @@ func FetchArticleHTML(urlToFetch string) (string, string, error) { } }) + doc.Find("img").Each(func(i int, s *goquery.Selection) { + src, ok := s.Attr("src") + if !ok || src == "" { + return + } + + if strings.HasPrefix(src, "/") { + s.SetAttr("src", "https://"+url.Hostname()+src) + } + }) + doc.Find("script").Each(func(i int, s *goquery.Selection) { s.Remove() }) - title := doc.Find("head title").Text() + page.Title = doc.Find("head title").Text() + page.Body, _ = doc.Html() - html, _ := doc.Html() - return title, html, err + page.Description = doc.Find(`head meta[name="description"]`).AttrOr("content", "") + if page.Description == "" { + page.Description = doc.Find(`head meta[property="og:description"]`).AttrOr("content", "") + } + + page.ImageURL = doc.Find(`head meta[property="og:image"]`).AttrOr("content", "") + if page.ImageURL == "" { + doc.Find("img").Each(func(i int, s *goquery.Selection) { + if page.ImageURL != "" { + return + } + + src, exists := s.Attr("src") + if !exists { + return + } + + u, err := url.Parse(src) + if err != nil { + return + } + + allowedTypes := []string{"png", "jpg", "jpeg", "webp"} + isAllowed := false + for _, tp := range allowedTypes { + if strings.HasSuffix(u.Path, tp) { + isAllowed = true + } + } + + if isAllowed { + page.ImageURL = src + } + }) + } + + return page, err } diff --git a/api/main.go b/api/main.go index f482b37..f3b1d51 100644 --- a/api/main.go +++ b/api/main.go @@ -36,13 +36,15 @@ type User struct { } type Article struct { - ID int64 `json:"id" db:"id"` - Title string `json:"title" db:"title"` - URL string `json:"url" db:"url"` - Body []byte `json:"-" db:"body"` - UserID int64 `json:"-" db:"user_id"` - CreatedAt string `json:"created_at" db:"created_at"` - UpdatedAt string `json:"updated_at" db:"updated_at"` + ID int64 `json:"id" db:"id"` + Title string `json:"title" db:"title"` + URL string `json:"url" db:"url"` + Body []byte `json:"-" db:"body"` + Description *string `json:"description" db:"description"` + Image *[]byte `json:"image" db:"image"` + UserID int64 `json:"-" db:"user_id"` + CreatedAt string `json:"created_at" db:"created_at"` + UpdatedAt string `json:"updated_at" db:"updated_at"` } func readJSON(r *http.Request, s any) error { @@ -121,6 +123,15 @@ func getUserIdFromRequest(r *http.Request) (int64, error) { return userId, nil } +func downloadImage(url string) ([]byte, error) { + resp, err := http.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + return io.ReadAll(resp.Body) +} + type AuthResponse struct { AccessToken string `json:"access_token"` } @@ -317,7 +328,7 @@ func main() { return } - title, html, err := app.FetchArticleHTML(body.URL) + page, err := app.FetchArticleHTML(body.URL) if err != nil { sendApiError(w, "couldn't fetch article", err, 500) return @@ -325,10 +336,19 @@ func main() { var b bytes.Buffer gw := gzip.NewWriter(&b) - gw.Write([]byte(html)) + gw.Write([]byte(page.Body)) gw.Close() - res, err := db.Exec("INSERT INTO articles (title, url, body, user_id) VALUES (?, ?, ?, ?)", title, body.URL, b.Bytes(), userId) + var imgData []byte + if page.ImageURL != "" { + imgData, err = downloadImage(page.ImageURL) + if err != nil { + sendApiError(w, "couldn't download image from url", err, 500) + return + } + } + + res, err := db.Exec("INSERT INTO articles (title, url, description, image, body, user_id) VALUES (?, ?, ?, ?, ?, ?)", page.Title, body.URL, page.Description, imgData, b.Bytes(), userId) if err != nil { sendApiError(w, "couldn't save article", err, 500) return diff --git a/web/src/api/articles/useArticlesQuery.ts b/web/src/api/articles/useArticlesQuery.ts index ffd5493..bde1e3b 100644 --- a/web/src/api/articles/useArticlesQuery.ts +++ b/web/src/api/articles/useArticlesQuery.ts @@ -5,6 +5,8 @@ export type Article = { id: number; title: string; url: string; + description: string | null; + image: string | null; created_at: string; updated_at: string; }; diff --git a/web/src/app/(home)/UserArticles.tsx b/web/src/app/(home)/UserArticles.tsx index 78df8c2..8fba256 100644 --- a/web/src/app/(home)/UserArticles.tsx +++ b/web/src/app/(home)/UserArticles.tsx @@ -5,6 +5,7 @@ import { SaveArticleForm } from "./SaveArticleForm"; import { ArticleDrawer } from "@/app/(home)/ArticleDrawer"; import { Button } from "@/components/ui/Button"; import { useDeleteArticleMutation } from "@/api/articles/useDeleteArticleMutation"; +import Image from "next/image"; export const UserArticles = () => { const { data: articles } = useArticlesQuery(); @@ -19,12 +20,29 @@ export const UserArticles = () => {
{articles?.map((article) => ( -
- -

- {article.title} -

-
+
+
+ {article.image && ( +
+ +
+ )} +
+ +

+ {article.title} +

+
+ {article.description && ( +

{article.description}

+ )} +
+