Skip to content

Commit

Permalink
remove the dreaded skrape.it, can't even produce a workable version w…
Browse files Browse the repository at this point in the history
…ithin 1hour of build time, coming back to the good ol jsoup
  • Loading branch information
FunkyMuse committed Aug 7, 2021
1 parent a2e3d31 commit bde2f4b
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 71 deletions.
2 changes: 1 addition & 1 deletion book/skraper/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies {


//jsoup
implementation "it.skrape:skrapeit:$skrapeit"
api "org.jsoup:jsoup:$jsoup"

implementation "com.github.FunkyMuse.KAHelpers:common:$KAHelpers"
implementation project(path: ':coroutines:dispatchers')
Expand Down
125 changes: 72 additions & 53 deletions book/skraper/src/main/java/com/funkymuse/aurora/skraper/BookScraper.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@ import com.crazylegend.common.tryOrNull
import com.funkymuse.aurora.bookmodel.Book
import com.funkymuse.aurora.extensions.removeBrackets
import com.funkymuse.aurora.serverconstants.*
import it.skrape.core.htmlDocument
import it.skrape.fetcher.HttpFetcher
import it.skrape.fetcher.response
import it.skrape.fetcher.skrape
import it.skrape.selects.DocElement
import org.jsoup.Connection
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import javax.inject.Inject
import javax.inject.Singleton

Expand All @@ -18,66 +17,86 @@ import javax.inject.Singleton
@Singleton
class BookScraper @Inject constructor() {

private fun List<DocElement>.toBook(): Book? {
private fun List<Element>.toBook(): Book? {
val id = tryOrNull {
this[2].eachLink.values.firstOrNull()?.substringAfter("md5=")
this[2].select("a").firstOrNull()?.attr("href")?.substringAfter("md5=")
} ?: return null
val extension = tryOrNull { this[33].text }?.uppercase()
val size = tryOrNull { this[31].text }?.substringBefore("(")?.trim()
val pages = tryOrNull { this[21].text }?.removeBrackets()?.substringBefore(" ")
val image = tryOrNull { this[0].eachImage.values.firstOrNull() }
val title = tryOrNull { this[2].text }
val author = tryOrNull { this[5].text }
val year = tryOrNull { this[15].text }
val extension = tryOrNull { this[33].text() }?.uppercase()
val size = tryOrNull { this[31].text() }?.substringBefore("(")?.trim()
val pages = tryOrNull { this[21].text() }?.removeBrackets()?.substringBefore(" ")
val image = tryOrNull {
this[0].select("img").firstOrNull()?.attr("src")
}
val title = tryOrNull { this[2].text() }
val author = tryOrNull { this[5].text() }
val year = tryOrNull { this[15].text() }
return Book(image, title, author, id, extension, pages, size, year)
}

fun generateSearchDataUrl(
page: Int, searchQuery: String, sortQuery: String, sortType: String,
searchInFieldsPosition: Int, maskWord: Boolean
): String = "$SEARCH_BASE_URL?$REQ_CONST=${
searchQuery.replace(
" ",
"+"
)
}&$SORT_QUERY=$sortQuery&$VIEW_QUERY=$VIEW_QUERY_PARAM&$RES_CONST=$PAGE_SIZE&" +
"&$COLUM_QUERY=${getFieldParamByPosition(searchInFieldsPosition)}&$SORT_TYPE=$sortType&" +
"$SEARCH_WITH_MASK=${if (maskWord) SEARCH_WITH_MASK_YES else SEARCH_WITH_MASK_NO}&" +
"$PAGE_CONST=$page"

searchInFieldsPosition: Int, maskWord: Boolean, connection: Connection
): Connection {
connection.apply {
timeout(DEFAULT_API_TIMEOUT)
data(REQ_CONST, searchQuery)
data(VIEW_QUERY, VIEW_QUERY_PARAM)
data(COLUM_QUERY, getFieldParamByPosition(searchInFieldsPosition))
data(SEARCH_WITH_MASK, if (maskWord) SEARCH_WITH_MASK_YES else SEARCH_WITH_MASK_NO)
data(RES_CONST, PAGE_SIZE)
data(SORT_QUERY, sortQuery)
data(SORT_TYPE, sortType)
data(PAGE_CONST, page.toString())
}
return connection
}

fun generateLatestBooksUrl(page: Int, sortQuery: String, sortType: String): String =
"$SEARCH_BASE_URL?$SORT_QUERY=$sortQuery&$VIEW_QUERY=$VIEW_QUERY_PARAM&$RES_CONST=$PAGE_SIZE&" +
"$LAST_MODE=$LAST_QUERY&$COLUM_QUERY=$FIELD_DEFAULT_PARAM&$SORT_TYPE=$sortType&" +
"$PAGE_CONST=$page"
fun generateLatestBooksUrl(
page: Int,
sortQuery: String,
sortType: String,
connection: Connection
): Connection {
connection.apply {
timeout(DEFAULT_API_TIMEOUT)
data(SORT_QUERY, sortQuery)
data(VIEW_QUERY, VIEW_QUERY_PARAM)
data(LAST_MODE, LAST_QUERY)
data(COLUM_QUERY, FIELD_DEFAULT_PARAM)
data(RES_CONST, PAGE_SIZE)
data(SORT_TYPE, sortType)
data(PAGE_CONST, page.toString())
}
return connection
}


suspend fun fetch(requestUrl: String): List<Book> =
skrape(HttpFetcher) {
request {
timeout = DEFAULT_API_TIMEOUT
url = requestUrl
}
response {
htmlDocument {
findAll("table").asSequence().drop(2).map {
fun fetch(connectionCallback: Connection.() -> Connection): List<Book> {
val jsoup = Jsoup.connect(SEARCH_BASE_URL)
.timeout(DEFAULT_API_TIMEOUT)
.connectionCallback()
val document = jsoup.get()
return processDocument(document)
}

val elementList =
tryOrNull {
it.findAll("tr").filter { it.children.size >= 2 }
}?.map { it.findAll("td") }?.flatten()
private fun processDocument(document: Document): List<Book> {
return document.select("table").asSequence().drop(2).map {

val res = if (!elementList.isNullOrEmpty()) {
elementList.mapNotNull {
elementList.toBook()
}
} else {
emptyList()
}
val elementList = tryOrNull { it.select("tr").filter { it.children().size >= 2 } }
?.map { it.select("td") }?.flatten()

res
}.flatten().toSet().toList()
val res = if (!elementList.isNullOrEmpty()) {
elementList.mapNotNull {
elementList.toBook()
}
} else {
emptyList()
}
}
}


res
}.flatten().toSet().toList()
}
}


2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ buildscript {
constraint_layout = "1.0.0-beta02"
activity = "1.3.1"
navigation_compose = "1.0.0-alpha03"
skrapeit = "1.1.5"
jsoup = "1.14.1"
data_store = "1.0.0"
coreKTX = "1.7.0-alpha01"
crashy = "1.2.0"
Expand Down
3 changes: 0 additions & 3 deletions latestbooks/latestbooksdata/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ dependencies {
implementation project(path: ':book:skraper')
implementation project(path: ':extensions')

//jsoup
implementation "it.skrape:skrapeit:$skrapeit"

implementation "androidx.lifecycle:lifecycle-viewmodel-ktx:$lifecycle"

implementation "com.github.FunkyMuse.KAHelpers:common:$KAHelpers"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,10 @@ class LatestBooksDataSource @AssistedInject constructor(
}
}

private suspend fun loadBooks(page: Int): LoadResult.Page<Int, Book> {
val list = scraper.fetch(scraper.generateLatestBooksUrl(page, sortQuery, sortType))
private fun loadBooks(page: Int): LoadResult.Page<Int, Book> {
val list = scraper.fetch {
scraper.generateLatestBooksUrl(page, sortQuery, sortType, this)
}
return if (list.isNullOrEmpty()) {
canNotLoadMoreContent()
} else {
Expand Down
3 changes: 0 additions & 3 deletions searchresult/searchresultdata/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ dependencies {
implementation project(path: ':book:skraper')
implementation project(path: ':searchresult:searchresultdestination')

//jsoup
implementation "it.skrape:skrapeit:$skrapeit"

implementation "androidx.lifecycle:lifecycle-viewmodel-ktx:$lifecycle"

implementation "com.github.FunkyMuse.KAHelpers:common:$KAHelpers"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import androidx.paging.PagingSource
import androidx.paging.PagingState
import com.crazylegend.collections.isNotNullOrEmpty
import com.crazylegend.common.isOnline
import com.crazylegend.common.tryOrNull
import com.crazylegend.retrofit.throwables.NoConnectionException
import com.funkymuse.aurora.bookmodel.Book
import com.funkymuse.aurora.dispatchers.IoDispatcher
Expand All @@ -16,10 +15,6 @@ import dagger.assisted.Assisted
import dagger.assisted.AssistedFactory
import dagger.assisted.AssistedInject
import dagger.hilt.android.qualifiers.ApplicationContext
import it.skrape.core.htmlDocument
import it.skrape.fetcher.HttpFetcher
import it.skrape.fetcher.response
import it.skrape.fetcher.skrape
import kotlinx.coroutines.CoroutineDispatcher
import kotlinx.coroutines.withContext

Expand All @@ -34,7 +29,7 @@ class SearchResultDataSource @AssistedInject constructor(
@Assisted(SEARCH_WITH_MASK) private val maskWord: Boolean,
@Assisted(SORT_TYPE) private val sortType: String,
@IoDispatcher private val dispatcher: CoroutineDispatcher,
private val scraper:BookScraper
private val scraper: BookScraper
) : PagingSource<Int, Book>() {

@AssistedFactory
Expand Down Expand Up @@ -64,8 +59,10 @@ class SearchResultDataSource @AssistedInject constructor(
}
}

private suspend fun loadBooks(page: Int): LoadResult.Page<Int, Book> {
val list = scraper.fetch(scraper.generateSearchDataUrl(page, searchQuery, sortQuery, sortType, searchInFieldsPosition, maskWord))
private fun loadBooks(page: Int): LoadResult.Page<Int, Book> {
val list = scraper.fetch{
scraper.generateSearchDataUrl(page, searchQuery, sortQuery, sortType, searchInFieldsPosition, maskWord, this)
}
return if (list.isNullOrEmpty()) {
canNotLoadMoreContent()
} else {
Expand Down

0 comments on commit bde2f4b

Please sign in to comment.