322 lines
6.8 KiB
Go
322 lines
6.8 KiB
Go
package main
|
|
|
|
import (
|
|
"archive/tar"
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"mime"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"path"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
func fetchUrl(url string) ([]byte, error) {
|
|
resp, err := http.Get(url)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to fetch url: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
data, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to read response: %v", err)
|
|
}
|
|
|
|
return data, nil
|
|
}
|
|
|
|
type TarFile struct {
|
|
Name string
|
|
Body []byte
|
|
}
|
|
|
|
func createTarball(files []TarFile) (bytes.Buffer, error) {
|
|
var buf bytes.Buffer
|
|
tw := tar.NewWriter(&buf)
|
|
|
|
for _, file := range files {
|
|
hdr := &tar.Header{
|
|
Name: file.Name,
|
|
Mode: 0644,
|
|
Size: int64(len(file.Body)),
|
|
}
|
|
if err := tw.WriteHeader(hdr); err != nil {
|
|
return buf, fmt.Errorf("failed to write tar header: %v", err)
|
|
}
|
|
// TODO: sometimes this fails with "write too long" as file written is larger than header size says it is
|
|
// don't know why yet, need to sleep
|
|
// good luck, future me
|
|
if _, err := tw.Write(file.Body); err != nil {
|
|
return buf, fmt.Errorf("failed to write file to tarball: %v", err)
|
|
}
|
|
}
|
|
if err := tw.Close(); err != nil {
|
|
return buf, fmt.Errorf("failed to close tar writer: %v", err)
|
|
}
|
|
|
|
return buf, nil
|
|
}
|
|
|
|
func findAllLinks(html []byte, baseUrl string) ([]TarFile, error) {
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create document from reader: %v", err)
|
|
}
|
|
|
|
files := []TarFile{}
|
|
|
|
doc.Find("head link").Each(func(i int, s *goquery.Selection) {
|
|
url := s.AttrOr("href", "")
|
|
if url == "" {
|
|
return
|
|
}
|
|
fileName := url
|
|
|
|
if strings.HasPrefix(url, "/") {
|
|
url = baseUrl + url
|
|
}
|
|
|
|
data, err := fetchUrl(url)
|
|
if err != nil {
|
|
log.Printf("failed to fetch url: %v\n", err)
|
|
return
|
|
}
|
|
|
|
files = append(files, TarFile{
|
|
Name: fileName,
|
|
Body: data,
|
|
})
|
|
})
|
|
|
|
doc.Find("script").Each(func(i int, s *goquery.Selection) {
|
|
src := s.AttrOr("src", "")
|
|
if src == "" {
|
|
return
|
|
}
|
|
fileName := src
|
|
|
|
if strings.HasPrefix(src, "/") {
|
|
src = baseUrl + src
|
|
}
|
|
|
|
data, err := fetchUrl(src)
|
|
if err != nil {
|
|
log.Printf("failed to fetch url: %v\n", err)
|
|
return
|
|
}
|
|
|
|
files = append(files, TarFile{
|
|
Name: fileName,
|
|
Body: data,
|
|
})
|
|
})
|
|
|
|
doc.Find("img").Each(func(i int, s *goquery.Selection) {
|
|
src := s.AttrOr("src", "")
|
|
if src == "" {
|
|
return
|
|
}
|
|
fileName := src
|
|
|
|
if strings.HasPrefix(src, "/") {
|
|
src = baseUrl + src
|
|
}
|
|
|
|
data, err := fetchUrl(src)
|
|
if err != nil {
|
|
log.Printf("failed to fetch url: %v\n", err)
|
|
return
|
|
}
|
|
|
|
files = append(files, TarFile{
|
|
Name: fileName,
|
|
Body: data,
|
|
})
|
|
})
|
|
|
|
return files, nil
|
|
}
|
|
|
|
type ReplaceFile struct {
|
|
Name string
|
|
Url string
|
|
}
|
|
|
|
func replaceLinks(buf *bytes.Buffer, files []ReplaceFile, baseUrl string) error {
|
|
doc, err := goquery.NewDocumentFromReader(buf)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create document from buffer: %v", err)
|
|
}
|
|
|
|
for _, file := range files {
|
|
ext := path.Ext(file.Name)
|
|
if ext == ".js" {
|
|
s := doc.Find(fmt.Sprintf(`script[src="%s"]`, file.Name))
|
|
if s != nil {
|
|
src := s.AttrOr("src", "")
|
|
if !strings.HasPrefix(src, "/") {
|
|
continue
|
|
}
|
|
s.SetAttr("src", baseUrl+src)
|
|
}
|
|
} else if ext == ".png" || ext == ".jpeg" || ext == ".jpg" || ext == ".svg" || ext == ".webp" {
|
|
s := doc.Find(fmt.Sprintf(`img[src="%s"]`, file.Name))
|
|
if s != nil {
|
|
src := s.AttrOr("src", "")
|
|
if !strings.HasPrefix(src, "/") {
|
|
continue
|
|
}
|
|
s.SetAttr("src", baseUrl+src)
|
|
}
|
|
} else {
|
|
s := doc.Find(fmt.Sprintf(`head link[href="%s"]`, file.Name))
|
|
if s != nil {
|
|
href := s.AttrOr("href", "")
|
|
if !strings.HasPrefix(href, "/") {
|
|
continue
|
|
}
|
|
s.SetAttr("href", baseUrl+href)
|
|
}
|
|
}
|
|
}
|
|
|
|
h, err := doc.Html()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get new html: %v", err)
|
|
}
|
|
|
|
if _, err := io.Copy(buf, bytes.NewReader([]byte(h))); err != nil {
|
|
return fmt.Errorf("failed to overwrite buffer with new html: %v", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func main() {
|
|
mux := http.NewServeMux()
|
|
|
|
mux.HandleFunc("GET /", func(w http.ResponseWriter, r *http.Request) {
|
|
siteUrl := r.URL.Query().Get("url")
|
|
if siteUrl == "" {
|
|
http.Error(w, "url parameter is required", 400)
|
|
return
|
|
}
|
|
|
|
html, err := fetchUrl(siteUrl)
|
|
if err != nil {
|
|
http.Error(w, fmt.Sprintf("failed to fetch html: %v", err), 500)
|
|
return
|
|
}
|
|
|
|
files := []TarFile{
|
|
{Name: "index.html", Body: html},
|
|
}
|
|
|
|
u, err := url.Parse(siteUrl)
|
|
if err != nil {
|
|
http.Error(w, fmt.Sprintf("failed to parse url: %v", err), 500)
|
|
return
|
|
}
|
|
baseSiteUrl := fmt.Sprintf("%s://%s", u.Scheme, u.Host)
|
|
|
|
otherFiles, err := findAllLinks(html, baseSiteUrl)
|
|
if err != nil {
|
|
http.Error(w, fmt.Sprintf("failed to find & download other files: %v", err), 500)
|
|
return
|
|
}
|
|
files = append(files, otherFiles...)
|
|
|
|
buf, err := createTarball(files)
|
|
if err != nil {
|
|
http.Error(w, fmt.Sprintf("failed to create tarball: %v", err), 500)
|
|
return
|
|
}
|
|
|
|
uid, err := uuid.NewV7()
|
|
if err != nil {
|
|
http.Error(w, fmt.Sprintf("failed to generate uuid: %v", err), 500)
|
|
return
|
|
}
|
|
|
|
fName := fmt.Sprintf("%s.tar", uid.String())
|
|
f, err := os.OpenFile(fName, os.O_CREATE|os.O_RDWR, 0644)
|
|
if err != nil {
|
|
http.Error(w, fmt.Sprintf("failed to open tar file: %v", err), 500)
|
|
return
|
|
}
|
|
defer f.Close()
|
|
|
|
if _, err := f.Write(buf.Bytes()); err != nil {
|
|
http.Error(w, fmt.Sprintf("failed to write data to tar file: %v", err), 500)
|
|
return
|
|
}
|
|
|
|
fmt.Fprintf(w, "%s", uid.String())
|
|
})
|
|
|
|
mux.HandleFunc("GET /{uuid}/", func(w http.ResponseWriter, r *http.Request) {
|
|
uid := r.PathValue("uuid")
|
|
|
|
fName := fmt.Sprintf("%s.tar", uid)
|
|
f, err := os.OpenFile(fName, os.O_RDONLY, 0644)
|
|
if err != nil {
|
|
http.Error(w, fmt.Sprintf("failed to open tarball: %v", err), 404)
|
|
return
|
|
}
|
|
defer f.Close()
|
|
|
|
urlFileName := "/" + strings.Join(strings.Split(r.URL.Path, "/")[2:], "/")
|
|
if urlFileName == "/" {
|
|
urlFileName = "index.html"
|
|
}
|
|
|
|
files := []ReplaceFile{}
|
|
|
|
var buf bytes.Buffer
|
|
|
|
tr := tar.NewReader(f)
|
|
for {
|
|
hdr, err := tr.Next()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
http.Error(w, fmt.Sprintf("failed to read tarball: %v", err), 500)
|
|
return
|
|
}
|
|
|
|
if hdr.Name == urlFileName {
|
|
if _, err := io.Copy(&buf, tr); err != nil {
|
|
return
|
|
}
|
|
}
|
|
|
|
files = append(files, ReplaceFile{
|
|
Name: hdr.Name,
|
|
Url: hdr.Name,
|
|
})
|
|
}
|
|
|
|
if urlFileName == "index.html" {
|
|
replaceLinks(&buf, files, "/"+uid)
|
|
}
|
|
|
|
w.Header().Set("Content-Type", mime.TypeByExtension(path.Ext(urlFileName)))
|
|
if _, err := io.Copy(w, &buf); err != nil {
|
|
http.Error(w, err.Error(), 500)
|
|
}
|
|
})
|
|
|
|
log.Println("starting http server")
|
|
if err := http.ListenAndServe(":5000", mux); err != nil {
|
|
log.Fatalf("failed to start http server: %v\n", err)
|
|
}
|
|
}
|