package main import ( "archive/tar" "bytes" "fmt" "io" "log" "mime" "net/http" "net/url" "os" "path" "strings" "github.com/PuerkitoBio/goquery" "github.com/google/uuid" ) func fetchUrl(url string) ([]byte, error) { resp, err := http.Get(url) if err != nil { return nil, fmt.Errorf("failed to fetch url: %v", err) } defer resp.Body.Close() data, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("failed to read response: %v", err) } return data, nil } type TarFile struct { Name string Body []byte } func createTarball(files []TarFile) (bytes.Buffer, error) { var buf bytes.Buffer tw := tar.NewWriter(&buf) for _, file := range files { hdr := &tar.Header{ Name: file.Name, Mode: 0644, Size: int64(len(file.Body)), } if err := tw.WriteHeader(hdr); err != nil { return buf, fmt.Errorf("failed to write tar header: %v", err) } // TODO: sometimes this fails with "write too long" as file written is larger than header size says it is // don't know why yet, need to sleep // good luck, future me if _, err := tw.Write(file.Body); err != nil { return buf, fmt.Errorf("failed to write file to tarball: %v", err) } } if err := tw.Close(); err != nil { return buf, fmt.Errorf("failed to close tar writer: %v", err) } return buf, nil } func findAllLinks(html []byte, baseUrl string) ([]TarFile, error) { doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html)) if err != nil { return nil, fmt.Errorf("failed to create document from reader: %v", err) } files := []TarFile{} doc.Find("head link").Each(func(i int, s *goquery.Selection) { url := s.AttrOr("href", "") if url == "" { return } fileName := url if strings.HasPrefix(url, "/") { url = baseUrl + url } data, err := fetchUrl(url) if err != nil { log.Printf("failed to fetch url: %v\n", err) return } files = append(files, TarFile{ Name: fileName, Body: data, }) }) doc.Find("script").Each(func(i int, s *goquery.Selection) { src := s.AttrOr("src", "") if src == "" { return } fileName := src if strings.HasPrefix(src, "/") { src = baseUrl + src } data, err := fetchUrl(src) if err != nil { log.Printf("failed to fetch url: %v\n", err) return } files = append(files, TarFile{ Name: fileName, Body: data, }) }) doc.Find("img").Each(func(i int, s *goquery.Selection) { src := s.AttrOr("src", "") if src == "" { return } fileName := src if strings.HasPrefix(src, "/") { src = baseUrl + src } data, err := fetchUrl(src) if err != nil { log.Printf("failed to fetch url: %v\n", err) return } files = append(files, TarFile{ Name: fileName, Body: data, }) }) return files, nil } type ReplaceFile struct { Name string Url string } func replaceLinks(buf *bytes.Buffer, files []ReplaceFile, baseUrl string) error { doc, err := goquery.NewDocumentFromReader(buf) if err != nil { return fmt.Errorf("failed to create document from buffer: %v", err) } for _, file := range files { ext := path.Ext(file.Name) if ext == ".js" { s := doc.Find(fmt.Sprintf(`script[src="%s"]`, file.Name)) if s != nil { src := s.AttrOr("src", "") if !strings.HasPrefix(src, "/") { continue } s.SetAttr("src", baseUrl+src) } } else if ext == ".png" || ext == ".jpeg" || ext == ".jpg" || ext == ".svg" || ext == ".webp" { s := doc.Find(fmt.Sprintf(`img[src="%s"]`, file.Name)) if s != nil { src := s.AttrOr("src", "") if !strings.HasPrefix(src, "/") { continue } s.SetAttr("src", baseUrl+src) } } else { s := doc.Find(fmt.Sprintf(`head link[href="%s"]`, file.Name)) if s != nil { href := s.AttrOr("href", "") if !strings.HasPrefix(href, "/") { continue } s.SetAttr("href", baseUrl+href) } } } h, err := doc.Html() if err != nil { return fmt.Errorf("failed to get new html: %v", err) } if _, err := io.Copy(buf, bytes.NewReader([]byte(h))); err != nil { return fmt.Errorf("failed to overwrite buffer with new html: %v", err) } return nil } func main() { mux := http.NewServeMux() mux.HandleFunc("GET /", func(w http.ResponseWriter, r *http.Request) { siteUrl := r.URL.Query().Get("url") if siteUrl == "" { http.Error(w, "url parameter is required", 400) return } html, err := fetchUrl(siteUrl) if err != nil { http.Error(w, fmt.Sprintf("failed to fetch html: %v", err), 500) return } files := []TarFile{ {Name: "index.html", Body: html}, } u, err := url.Parse(siteUrl) if err != nil { http.Error(w, fmt.Sprintf("failed to parse url: %v", err), 500) return } baseSiteUrl := fmt.Sprintf("%s://%s", u.Scheme, u.Host) otherFiles, err := findAllLinks(html, baseSiteUrl) if err != nil { http.Error(w, fmt.Sprintf("failed to find & download other files: %v", err), 500) return } files = append(files, otherFiles...) buf, err := createTarball(files) if err != nil { http.Error(w, fmt.Sprintf("failed to create tarball: %v", err), 500) return } uid, err := uuid.NewV7() if err != nil { http.Error(w, fmt.Sprintf("failed to generate uuid: %v", err), 500) return } fName := fmt.Sprintf("%s.tar", uid.String()) f, err := os.OpenFile(fName, os.O_CREATE|os.O_RDWR, 0644) if err != nil { http.Error(w, fmt.Sprintf("failed to open tar file: %v", err), 500) return } defer f.Close() if _, err := f.Write(buf.Bytes()); err != nil { http.Error(w, fmt.Sprintf("failed to write data to tar file: %v", err), 500) return } fmt.Fprintf(w, "%s", uid.String()) }) mux.HandleFunc("GET /{uuid}/", func(w http.ResponseWriter, r *http.Request) { uid := r.PathValue("uuid") fName := fmt.Sprintf("%s.tar", uid) f, err := os.OpenFile(fName, os.O_RDONLY, 0644) if err != nil { http.Error(w, fmt.Sprintf("failed to open tarball: %v", err), 404) return } defer f.Close() urlFileName := "/" + strings.Join(strings.Split(r.URL.Path, "/")[2:], "/") if urlFileName == "/" { urlFileName = "index.html" } files := []ReplaceFile{} var buf bytes.Buffer tr := tar.NewReader(f) for { hdr, err := tr.Next() if err == io.EOF { break } if err != nil { http.Error(w, fmt.Sprintf("failed to read tarball: %v", err), 500) return } if hdr.Name == urlFileName { if _, err := io.Copy(&buf, tr); err != nil { return } } files = append(files, ReplaceFile{ Name: hdr.Name, Url: hdr.Name, }) } if urlFileName == "index.html" { replaceLinks(&buf, files, "/"+uid) } w.Header().Set("Content-Type", mime.TypeByExtension(path.Ext(urlFileName))) if _, err := io.Copy(w, &buf); err != nil { http.Error(w, err.Error(), 500) } }) log.Println("starting http server") if err := http.ListenAndServe(":5000", mux); err != nil { log.Fatalf("failed to start http server: %v\n", err) } }