From 386536de33f5aa72024e11b415246b92a23e6ba0 Mon Sep 17 00:00:00 2001 From: rusk Date: Tue, 22 Dec 2020 02:38:21 -0800 Subject: [PATCH] its very clean now --- LICENSE | 1 + main.go | 53 +++++++++++++++++++++++++---------------------------- readme.md | 11 +++++++---- 3 files changed, 33 insertions(+), 32 deletions(-) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..317ec58 --- /dev/null +++ b/LICENSE @@ -0,0 +1 @@ +Goscrape is in the public domain. diff --git a/main.go b/main.go index 599a908..d1c09e0 100644 --- a/main.go +++ b/main.go @@ -30,39 +30,38 @@ func main() { getopt.Usage() return } - + args := getopt.Args() if len(args) < 1 { getopt.Usage() return } - + urls := strings.Split(args[0], " ") origDir, _ := os.Getwd() Gochan := godesu.New() - // loop through all urls - for urlNum, url := range urls { + for urlNum, url := range urls { // loop through all urls purl := strings.Split(url, "/") ThreadNum, err := strconv.Atoi(purl[5]) if err != nil { - fmt.Printf("Could not convert thread number to int! Make sure the URL is correct. | %v\n", err) + fmt.Printf("Could not convert thread number to int! Make sure the URL is correct. %v\n", err) return } - + err, Thread := Gochan.Board(purl[3]).GetThread(ThreadNum) if err != nil { - fmt.Printf("Could not fetch thread! | %v\n", err) + fmt.Printf("Could not fetch thread! %v\n", err) return } - + images := Thread.Images() finishStateChan := make(chan finishState, len(images)) // make the download channel with proper buffer size if *customDownloadDir != "" { if err := os.Chdir(*customDownloadDir + "/"); err != nil { if err := os.MkdirAll(*customDownloadDir+"/", os.ModePerm); err != nil { - fmt.Printf("Cannot create directory! | %v\n", err) + fmt.Printf("Cannot create directory! %v\n", err) return } else { os.Chdir(*customDownloadDir + "/") @@ -70,56 +69,54 @@ func main() { } } else { if err := os.MkdirAll(purl[3]+"/"+purl[5], os.ModePerm); err != nil { - fmt.Printf("Cannot create directory! | %v\n", err) + fmt.Printf("Cannot create directory! %v\n", err) return } os.Chdir(purl[3] + "/" + purl[5]) } - fmt.Printf("Downloading '%v' | %v of %v\n", url, urlNum+1, len(urls)) + fmt.Printf("Downloading '%v' %v of %v\n", url, urlNum+1, len(urls)) - // get the images downloading - for _, image := range images { + for _, image := range images { // get the images downloading go func(image godesu.Image) { - var filename string + var fs finishState if *useOrigFilename { - filename = image.OriginalFilename + fs.filename = image.OriginalFilename } else { - filename = image.Filename + image.Extension + fs.filename = image.Filename + image.Extension } - fs := finishState{filename: filename} - if _, err := os.Stat(filename); err == nil { - fs.err = fmt.Errorf("'%v' exists! Skipping...", filename) + if _, err := os.Stat(fs.filename); err == nil { + fs.err = fmt.Errorf("'%v' exists! Skipping...", fs.filename) finishStateChan <- fs return } resp, err := http.Get(image.URL) if err != nil { - fs.err = fmt.Errorf("Error downloading '%v'! | %v", image.URL, err) + fs.err = fmt.Errorf("Error downloading '%v'! %v", image.URL, err) finishStateChan <- fs return } else if resp.StatusCode != http.StatusOK { - fs.err = fmt.Errorf("Error downloading '%v'! Http status not ok: %v", image.URL, resp.StatusCode) + fs.err = fmt.Errorf("Error downloading '%v'! Http status not ok: %s", image.URL, resp.StatusCode) finishStateChan <- fs return } defer resp.Body.Close() - tmpFilename := filename + ".part" + tmpFilename := fs.filename + ".part" file, err := os.Create(tmpFilename) if err != nil { - fs.err = fmt.Errorf("Cannot create '%v'! | %v", tmpFilename, err) + fs.err = fmt.Errorf("Cannot create '%v'! %v", tmpFilename, err) finishStateChan <- fs return } io.Copy(file, resp.Body) - if err := os.Rename(tmpFilename, filename); err != nil { - fs.err = fmt.Errorf("Unable to rename '%v' to '%v'! | %v", tmpFilename, filename, err) + if err := os.Rename(tmpFilename, fs.filename); err != nil { + fs.err = fmt.Errorf("Unable to rename '%v' to '%v'! %v", tmpFilename, fs.filename, err) } finishStateChan <- fs @@ -127,12 +124,12 @@ func main() { }(image) } - for i := 0; i < len(images); i++ { + for i := 0; i < len(images); i++ { // watch for images to finish fs := <-finishStateChan if fs.err != nil { - fmt.Printf("%v | %v of %v\n", fs.err, i+1, len(images)) + fmt.Printf("%v %v of %v\n", fs.err, i+1, len(images)) } else { - fmt.Printf("Finished downloading '%v' | %v of %v\n", fs.filename, i+1, len(images)) + fmt.Printf("Finished downloading '%v' %v of %v\n", fs.filename, i+1, len(images)) } } diff --git a/readme.md b/readme.md index d3d44d0..e64f026 100644 --- a/readme.md +++ b/readme.md @@ -5,7 +5,13 @@ Goscrape is a very simple and efficient 4chan media scraper written in go that s `go get github.com/lordrusk/goscrape` ## How to use -You can simply run goscrape with a link to a thread and it'll download all the images. For multiple links, put in quotes. Add the `-o` option to download with original filenames, and use `-c` to set a custom directory. +`-h` for help menu. + +`-o` for original filenames. + +`-c` to set a custom directory. + +Put in quotes for multiple threads. ## Features * Goscrape is upwards of 4x faster then other scrapers, goscrape does this by using go's concurrency to download multiple images at the same time, taking advantage of more bandwidth. You won't find download speeds like this anywhere else. @@ -14,6 +20,3 @@ You can simply run goscrape with a link to a thread and it'll download all the i ## Why? Because *all* 4chan scrapers I've seen and used were written in python, I dislike python and wanted to make something in GO. - -### License -Goscrape is in the public domain.