Skip to content

Commit

Permalink
its very clean now
Browse files Browse the repository at this point in the history
  • Loading branch information
LordRusk committed Dec 22, 2020
1 parent 14d3065 commit 386536d
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 32 deletions.
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Goscrape is in the public domain.
53 changes: 25 additions & 28 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,109 +30,106 @@ func main() {
getopt.Usage()
return
}

args := getopt.Args()
if len(args) < 1 {
getopt.Usage()
return
}

urls := strings.Split(args[0], " ")
origDir, _ := os.Getwd()
Gochan := godesu.New()

// loop through all urls
for urlNum, url := range urls {
for urlNum, url := range urls { // loop through all urls
purl := strings.Split(url, "/")
ThreadNum, err := strconv.Atoi(purl[5])
if err != nil {
fmt.Printf("Could not convert thread number to int! Make sure the URL is correct. | %v\n", err)
fmt.Printf("Could not convert thread number to int! Make sure the URL is correct. %v\n", err)
return
}

err, Thread := Gochan.Board(purl[3]).GetThread(ThreadNum)
if err != nil {
fmt.Printf("Could not fetch thread! | %v\n", err)
fmt.Printf("Could not fetch thread! %v\n", err)
return
}

images := Thread.Images()
finishStateChan := make(chan finishState, len(images)) // make the download channel with proper buffer size

if *customDownloadDir != "" {
if err := os.Chdir(*customDownloadDir + "/"); err != nil {
if err := os.MkdirAll(*customDownloadDir+"/", os.ModePerm); err != nil {
fmt.Printf("Cannot create directory! | %v\n", err)
fmt.Printf("Cannot create directory! %v\n", err)
return
} else {
os.Chdir(*customDownloadDir + "/")
}
}
} else {
if err := os.MkdirAll(purl[3]+"/"+purl[5], os.ModePerm); err != nil {
fmt.Printf("Cannot create directory! | %v\n", err)
fmt.Printf("Cannot create directory! %v\n", err)
return
}
os.Chdir(purl[3] + "/" + purl[5])
}

fmt.Printf("Downloading '%v' | %v of %v\n", url, urlNum+1, len(urls))
fmt.Printf("Downloading '%v' %v of %v\n", url, urlNum+1, len(urls))

// get the images downloading
for _, image := range images {
for _, image := range images { // get the images downloading
go func(image godesu.Image) {
var filename string
var fs finishState
if *useOrigFilename {
filename = image.OriginalFilename
fs.filename = image.OriginalFilename
} else {
filename = image.Filename + image.Extension
fs.filename = image.Filename + image.Extension
}
fs := finishState{filename: filename}

if _, err := os.Stat(filename); err == nil {
fs.err = fmt.Errorf("'%v' exists! Skipping...", filename)
if _, err := os.Stat(fs.filename); err == nil {
fs.err = fmt.Errorf("'%v' exists! Skipping...", fs.filename)
finishStateChan <- fs
return
}

resp, err := http.Get(image.URL)
if err != nil {
fs.err = fmt.Errorf("Error downloading '%v'! | %v", image.URL, err)
fs.err = fmt.Errorf("Error downloading '%v'! %v", image.URL, err)
finishStateChan <- fs
return
} else if resp.StatusCode != http.StatusOK {
fs.err = fmt.Errorf("Error downloading '%v'! Http status not ok: %v", image.URL, resp.StatusCode)
fs.err = fmt.Errorf("Error downloading '%v'! Http status not ok: %s", image.URL, resp.StatusCode)
finishStateChan <- fs
return
}
defer resp.Body.Close()

tmpFilename := filename + ".part"
tmpFilename := fs.filename + ".part"

file, err := os.Create(tmpFilename)
if err != nil {
fs.err = fmt.Errorf("Cannot create '%v'! | %v", tmpFilename, err)
fs.err = fmt.Errorf("Cannot create '%v'! %v", tmpFilename, err)
finishStateChan <- fs
return
}

io.Copy(file, resp.Body)

if err := os.Rename(tmpFilename, filename); err != nil {
fs.err = fmt.Errorf("Unable to rename '%v' to '%v'! | %v", tmpFilename, filename, err)
if err := os.Rename(tmpFilename, fs.filename); err != nil {
fs.err = fmt.Errorf("Unable to rename '%v' to '%v'! %v", tmpFilename, fs.filename, err)
}

finishStateChan <- fs
return
}(image)
}

for i := 0; i < len(images); i++ {
for i := 0; i < len(images); i++ { // watch for images to finish
fs := <-finishStateChan
if fs.err != nil {
fmt.Printf("%v | %v of %v\n", fs.err, i+1, len(images))
fmt.Printf("%v %v of %v\n", fs.err, i+1, len(images))
} else {
fmt.Printf("Finished downloading '%v' | %v of %v\n", fs.filename, i+1, len(images))
fmt.Printf("Finished downloading '%v' %v of %v\n", fs.filename, i+1, len(images))
}
}

Expand Down
11 changes: 7 additions & 4 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@ Goscrape is a very simple and efficient 4chan media scraper written in go that s
`go get github.com/lordrusk/goscrape`

## How to use
You can simply run goscrape with a link to a thread and it'll download all the images. For multiple links, put in quotes. Add the `-o` option to download with original filenames, and use `-c` to set a custom directory.
`-h` for help menu.

`-o` for original filenames.

`-c` to set a custom directory.

Put in quotes for multiple threads.

## Features
* Goscrape is upwards of 4x faster then other scrapers, goscrape does this by using go's concurrency to download multiple images at the same time, taking advantage of more bandwidth. You won't find download speeds like this anywhere else.
Expand All @@ -14,6 +20,3 @@ You can simply run goscrape with a link to a thread and it'll download all the i

## Why?
Because *all* 4chan scrapers I've seen and used were written in python, I dislike python and wanted to make something in GO.

### License
Goscrape is in the public domain.

0 comments on commit 386536d

Please sign in to comment.