Add popular (인기영상) category with 40 items parsing
This commit is contained in:
@@ -16,6 +16,7 @@ class TvmonScraper {
|
|||||||
private const val USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
private const val USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
|
||||||
val CATEGORIES = mapOf(
|
val CATEGORIES = mapOf(
|
||||||
|
"popular" to Category("popular", "인기영상", "/popular"),
|
||||||
"movie" to Category("movie", "영화", "/movie"),
|
"movie" to Category("movie", "영화", "/movie"),
|
||||||
"kor_movie" to Category("kor_movie", "한국영화", "/kor_movie"),
|
"kor_movie" to Category("kor_movie", "한국영화", "/kor_movie"),
|
||||||
"drama" to Category("drama", "드라마", "/drama"),
|
"drama" to Category("drama", "드라마", "/drama"),
|
||||||
@@ -161,6 +162,18 @@ class TvmonScraper {
|
|||||||
val items = mutableListOf<Content>()
|
val items = mutableListOf<Content>()
|
||||||
val seen = mutableSetOf<String>()
|
val seen = mutableSetOf<String>()
|
||||||
|
|
||||||
|
// Special handling for /popular - different page structure
|
||||||
|
if (categoryKey == "popular") {
|
||||||
|
val popularItems = parsePopularPage(doc, seen)
|
||||||
|
return@withContext CategoryResult(
|
||||||
|
success = true,
|
||||||
|
category = "인기영상",
|
||||||
|
items = popularItems,
|
||||||
|
page = page,
|
||||||
|
pagination = Pagination(page, 1)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
val titleLinks = doc.select("a.title[href*=/$categoryKey/]")
|
val titleLinks = doc.select("a.title[href*=/$categoryKey/]")
|
||||||
|
|
||||||
for (titleLink in titleLinks) {
|
for (titleLink in titleLinks) {
|
||||||
@@ -210,6 +223,48 @@ class TvmonScraper {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun parsePopularPage(doc: org.jsoup.nodes.Document, seen: MutableSet<String>): List<Content> {
|
||||||
|
val items = mutableListOf<Content>()
|
||||||
|
val seenTitles = mutableSetOf<String>()
|
||||||
|
|
||||||
|
val anchors = doc.select("a[href*='/drama/'], a[href*='/movie/'], a[href*='/kor_movie/'], a[href*='/world/'], a[href*='/animation/'], a[href*='/ani_movie/'], a[href*='/ent/'], a[href*='/sisa/'], a[href*='/ott_ent/']")
|
||||||
|
|
||||||
|
for (link in anchors) {
|
||||||
|
val href = link.attr("href")
|
||||||
|
if (href.isBlank() || href in seen) continue
|
||||||
|
if (!href.contains("/drama/") && !href.contains("/movie/") && !href.contains("/kor_movie/") && !href.contains("/world/") && !href.contains("/animation/") && !href.contains("/ani_movie/") && !href.contains("/ent/") && !href.contains("/sisa/") && !href.contains("/ott_ent/")) continue
|
||||||
|
|
||||||
|
val idMatch = Pattern.compile("/(\\w+)/(\\d+)").matcher(href)
|
||||||
|
if (!idMatch.find()) continue
|
||||||
|
|
||||||
|
val categoryKey = idMatch.group(1)
|
||||||
|
val contentId = idMatch.group(2)
|
||||||
|
val fullUrl = resolveUrl(href)
|
||||||
|
|
||||||
|
val title = link.text().trim()
|
||||||
|
if (title.isBlank() || title in seenTitles) continue
|
||||||
|
if (title.length < 2) continue
|
||||||
|
seenTitles.add(title)
|
||||||
|
|
||||||
|
var imgUrl = ""
|
||||||
|
val parent = link.parent()
|
||||||
|
val img = parent?.selectFirst("img") ?: link.selectFirst("img")
|
||||||
|
imgUrl = img?.attr("src") ?: img?.attr("data-src") ?: ""
|
||||||
|
|
||||||
|
seen.add(fullUrl)
|
||||||
|
items.add(Content(
|
||||||
|
id = contentId,
|
||||||
|
title = title,
|
||||||
|
url = fullUrl,
|
||||||
|
thumbnail = if (imgUrl.startsWith("http")) imgUrl else if (imgUrl.isNotBlank()) BASE_URL + imgUrl else "",
|
||||||
|
category = categoryKey
|
||||||
|
))
|
||||||
|
|
||||||
|
if (items.size >= 40) break
|
||||||
|
}
|
||||||
|
return items
|
||||||
|
}
|
||||||
|
|
||||||
suspend fun getDetail(urlOrId: String, category: String? = null): ContentDetail = withContext(Dispatchers.IO) {
|
suspend fun getDetail(urlOrId: String, category: String? = null): ContentDetail = withContext(Dispatchers.IO) {
|
||||||
val seriesUrl = extractSeriesUrl(
|
val seriesUrl = extractSeriesUrl(
|
||||||
if (urlOrId.startsWith("http")) urlOrId
|
if (urlOrId.startsWith("http")) urlOrId
|
||||||
|
|||||||
Reference in New Issue
Block a user