Add popular (인기영상) category with 40 items parsing
This commit is contained in:
@@ -16,6 +16,7 @@ class TvmonScraper {
|
||||
private const val USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
|
||||
val CATEGORIES = mapOf(
|
||||
"popular" to Category("popular", "인기영상", "/popular"),
|
||||
"movie" to Category("movie", "영화", "/movie"),
|
||||
"kor_movie" to Category("kor_movie", "한국영화", "/kor_movie"),
|
||||
"drama" to Category("drama", "드라마", "/drama"),
|
||||
@@ -161,6 +162,18 @@ class TvmonScraper {
|
||||
val items = mutableListOf<Content>()
|
||||
val seen = mutableSetOf<String>()
|
||||
|
||||
// Special handling for /popular - different page structure
|
||||
if (categoryKey == "popular") {
|
||||
val popularItems = parsePopularPage(doc, seen)
|
||||
return@withContext CategoryResult(
|
||||
success = true,
|
||||
category = "인기영상",
|
||||
items = popularItems,
|
||||
page = page,
|
||||
pagination = Pagination(page, 1)
|
||||
)
|
||||
}
|
||||
|
||||
val titleLinks = doc.select("a.title[href*=/$categoryKey/]")
|
||||
|
||||
for (titleLink in titleLinks) {
|
||||
@@ -210,6 +223,48 @@ class TvmonScraper {
|
||||
)
|
||||
}
|
||||
|
||||
private fun parsePopularPage(doc: org.jsoup.nodes.Document, seen: MutableSet<String>): List<Content> {
|
||||
val items = mutableListOf<Content>()
|
||||
val seenTitles = mutableSetOf<String>()
|
||||
|
||||
val anchors = doc.select("a[href*='/drama/'], a[href*='/movie/'], a[href*='/kor_movie/'], a[href*='/world/'], a[href*='/animation/'], a[href*='/ani_movie/'], a[href*='/ent/'], a[href*='/sisa/'], a[href*='/ott_ent/']")
|
||||
|
||||
for (link in anchors) {
|
||||
val href = link.attr("href")
|
||||
if (href.isBlank() || href in seen) continue
|
||||
if (!href.contains("/drama/") && !href.contains("/movie/") && !href.contains("/kor_movie/") && !href.contains("/world/") && !href.contains("/animation/") && !href.contains("/ani_movie/") && !href.contains("/ent/") && !href.contains("/sisa/") && !href.contains("/ott_ent/")) continue
|
||||
|
||||
val idMatch = Pattern.compile("/(\\w+)/(\\d+)").matcher(href)
|
||||
if (!idMatch.find()) continue
|
||||
|
||||
val categoryKey = idMatch.group(1)
|
||||
val contentId = idMatch.group(2)
|
||||
val fullUrl = resolveUrl(href)
|
||||
|
||||
val title = link.text().trim()
|
||||
if (title.isBlank() || title in seenTitles) continue
|
||||
if (title.length < 2) continue
|
||||
seenTitles.add(title)
|
||||
|
||||
var imgUrl = ""
|
||||
val parent = link.parent()
|
||||
val img = parent?.selectFirst("img") ?: link.selectFirst("img")
|
||||
imgUrl = img?.attr("src") ?: img?.attr("data-src") ?: ""
|
||||
|
||||
seen.add(fullUrl)
|
||||
items.add(Content(
|
||||
id = contentId,
|
||||
title = title,
|
||||
url = fullUrl,
|
||||
thumbnail = if (imgUrl.startsWith("http")) imgUrl else if (imgUrl.isNotBlank()) BASE_URL + imgUrl else "",
|
||||
category = categoryKey
|
||||
))
|
||||
|
||||
if (items.size >= 40) break
|
||||
}
|
||||
return items
|
||||
}
|
||||
|
||||
suspend fun getDetail(urlOrId: String, category: String? = null): ContentDetail = withContext(Dispatchers.IO) {
|
||||
val seriesUrl = extractSeriesUrl(
|
||||
if (urlOrId.startsWith("http")) urlOrId
|
||||
|
||||
Reference in New Issue
Block a user