This commit is contained in:
2026-02-19 14:57:33 +03:00
commit 2bb6c5a8d6
4 changed files with 408 additions and 0 deletions

321
main.go Normal file
View File

@@ -0,0 +1,321 @@
package main
import (
"archive/tar"
"bytes"
"fmt"
"io"
"log"
"mime"
"net/http"
"net/url"
"os"
"path"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/google/uuid"
)
func fetchUrl(url string) ([]byte, error) {
resp, err := http.Get(url)
if err != nil {
return nil, fmt.Errorf("failed to fetch url: %v", err)
}
defer resp.Body.Close()
data, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response: %v", err)
}
return data, nil
}
type TarFile struct {
Name string
Body []byte
}
func createTarball(files []TarFile) (bytes.Buffer, error) {
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
for _, file := range files {
hdr := &tar.Header{
Name: file.Name,
Mode: 0644,
Size: int64(len(file.Body)),
}
if err := tw.WriteHeader(hdr); err != nil {
return buf, fmt.Errorf("failed to write tar header: %v", err)
}
// TODO: sometimes this fails with "write too long" as file written is larger than header size says it is
// don't know why yet, need to sleep
// good luck, future me
if _, err := tw.Write(file.Body); err != nil {
return buf, fmt.Errorf("failed to write file to tarball: %v", err)
}
}
if err := tw.Close(); err != nil {
return buf, fmt.Errorf("failed to close tar writer: %v", err)
}
return buf, nil
}
func findAllLinks(html []byte, baseUrl string) ([]TarFile, error) {
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html))
if err != nil {
return nil, fmt.Errorf("failed to create document from reader: %v", err)
}
files := []TarFile{}
doc.Find("head link").Each(func(i int, s *goquery.Selection) {
url := s.AttrOr("href", "")
if url == "" {
return
}
fileName := url
if strings.HasPrefix(url, "/") {
url = baseUrl + url
}
data, err := fetchUrl(url)
if err != nil {
log.Printf("failed to fetch url: %v\n", err)
return
}
files = append(files, TarFile{
Name: fileName,
Body: data,
})
})
doc.Find("script").Each(func(i int, s *goquery.Selection) {
src := s.AttrOr("src", "")
if src == "" {
return
}
fileName := src
if strings.HasPrefix(src, "/") {
src = baseUrl + src
}
data, err := fetchUrl(src)
if err != nil {
log.Printf("failed to fetch url: %v\n", err)
return
}
files = append(files, TarFile{
Name: fileName,
Body: data,
})
})
doc.Find("img").Each(func(i int, s *goquery.Selection) {
src := s.AttrOr("src", "")
if src == "" {
return
}
fileName := src
if strings.HasPrefix(src, "/") {
src = baseUrl + src
}
data, err := fetchUrl(src)
if err != nil {
log.Printf("failed to fetch url: %v\n", err)
return
}
files = append(files, TarFile{
Name: fileName,
Body: data,
})
})
return files, nil
}
type ReplaceFile struct {
Name string
Url string
}
func replaceLinks(buf *bytes.Buffer, files []ReplaceFile, baseUrl string) error {
doc, err := goquery.NewDocumentFromReader(buf)
if err != nil {
return fmt.Errorf("failed to create document from buffer: %v", err)
}
for _, file := range files {
ext := path.Ext(file.Name)
if ext == ".js" {
s := doc.Find(fmt.Sprintf(`script[src="%s"]`, file.Name))
if s != nil {
src := s.AttrOr("src", "")
if !strings.HasPrefix(src, "/") {
continue
}
s.SetAttr("src", baseUrl+src)
}
} else if ext == ".png" || ext == ".jpeg" || ext == ".jpg" || ext == ".svg" || ext == ".webp" {
s := doc.Find(fmt.Sprintf(`img[src="%s"]`, file.Name))
if s != nil {
src := s.AttrOr("src", "")
if !strings.HasPrefix(src, "/") {
continue
}
s.SetAttr("src", baseUrl+src)
}
} else {
s := doc.Find(fmt.Sprintf(`head link[href="%s"]`, file.Name))
if s != nil {
href := s.AttrOr("href", "")
if !strings.HasPrefix(href, "/") {
continue
}
s.SetAttr("href", baseUrl+href)
}
}
}
h, err := doc.Html()
if err != nil {
return fmt.Errorf("failed to get new html: %v", err)
}
if _, err := io.Copy(buf, bytes.NewReader([]byte(h))); err != nil {
return fmt.Errorf("failed to overwrite buffer with new html: %v", err)
}
return nil
}
func main() {
mux := http.NewServeMux()
mux.HandleFunc("GET /", func(w http.ResponseWriter, r *http.Request) {
siteUrl := r.URL.Query().Get("url")
if siteUrl == "" {
http.Error(w, "url parameter is required", 400)
return
}
html, err := fetchUrl(siteUrl)
if err != nil {
http.Error(w, fmt.Sprintf("failed to fetch html: %v", err), 500)
return
}
files := []TarFile{
{Name: "index.html", Body: html},
}
u, err := url.Parse(siteUrl)
if err != nil {
http.Error(w, fmt.Sprintf("failed to parse url: %v", err), 500)
return
}
baseSiteUrl := fmt.Sprintf("%s://%s", u.Scheme, u.Host)
otherFiles, err := findAllLinks(html, baseSiteUrl)
if err != nil {
http.Error(w, fmt.Sprintf("failed to find & download other files: %v", err), 500)
return
}
files = append(files, otherFiles...)
buf, err := createTarball(files)
if err != nil {
http.Error(w, fmt.Sprintf("failed to create tarball: %v", err), 500)
return
}
uid, err := uuid.NewV7()
if err != nil {
http.Error(w, fmt.Sprintf("failed to generate uuid: %v", err), 500)
return
}
fName := fmt.Sprintf("%s.tar", uid.String())
f, err := os.OpenFile(fName, os.O_CREATE|os.O_RDWR, 0644)
if err != nil {
http.Error(w, fmt.Sprintf("failed to open tar file: %v", err), 500)
return
}
defer f.Close()
if _, err := f.Write(buf.Bytes()); err != nil {
http.Error(w, fmt.Sprintf("failed to write data to tar file: %v", err), 500)
return
}
fmt.Fprintf(w, "%s", uid.String())
})
mux.HandleFunc("GET /{uuid}/", func(w http.ResponseWriter, r *http.Request) {
uid := r.PathValue("uuid")
fName := fmt.Sprintf("%s.tar", uid)
f, err := os.OpenFile(fName, os.O_RDONLY, 0644)
if err != nil {
http.Error(w, fmt.Sprintf("failed to open tarball: %v", err), 404)
return
}
defer f.Close()
urlFileName := "/" + strings.Join(strings.Split(r.URL.Path, "/")[2:], "/")
if urlFileName == "/" {
urlFileName = "index.html"
}
files := []ReplaceFile{}
var buf bytes.Buffer
tr := tar.NewReader(f)
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
http.Error(w, fmt.Sprintf("failed to read tarball: %v", err), 500)
return
}
if hdr.Name == urlFileName {
if _, err := io.Copy(&buf, tr); err != nil {
return
}
}
files = append(files, ReplaceFile{
Name: hdr.Name,
Url: hdr.Name,
})
}
if urlFileName == "index.html" {
replaceLinks(&buf, files, "/"+uid)
}
w.Header().Set("Content-Type", mime.TypeByExtension(path.Ext(urlFileName)))
if _, err := io.Copy(w, &buf); err != nil {
http.Error(w, err.Error(), 500)
}
})
log.Println("starting http server")
if err := http.ListenAndServe(":5000", mux); err != nil {
log.Fatalf("failed to start http server: %v\n", err)
}
}