Fix episode list parsing with more comprehensive CSS selectors

This commit is contained in:
tvmon-dev
2026-04-15 20:55:22 +09:00
parent 6e193ca945
commit 91d66896b7

View File

@@ -407,7 +407,7 @@ class TvmonScraper {
}
}
val episodes = mutableListOf<Episode>()
val episodes = mutableListOf<Episode>()
val videoLinks = mutableListOf<VideoLink>()
val seenEpisodeIds = mutableSetOf<String>()
val seenNumbers = mutableSetOf<String>()
@@ -417,7 +417,9 @@ val episodes = mutableListOf<Episode>()
val allEpisodeLinks = doc.select(
".next-ep-list-scroll .ep-item, .ep-item, .bo_v_list li a, #bo_v_list li a, " +
".list_body .item a, .ep-list a, .episode-list a, .ep-link a, " +
"a[href*='/$seriesId/'], a[href*='/${seriesId}/']"
".bo_v_list a, .view-list a, a[href*='/$seriesId/'], a[href*='/${seriesId}/'], " +
".episode-item a, .episodelist a, #episode-list a, .all-episode-list a, " +
".bo_v_title + div a, .list-wrap a[href*='/$seriesId/']"
)
var episodeIndex = 0
@@ -425,6 +427,7 @@ val episodes = mutableListOf<Episode>()
for (link in allEpisodeLinks) {
val href = link.attr("href")
if (href.isBlank()) continue
if (!href.contains("/$seriesId/") && !href.contains("/${seriesId}/")) continue
val fullUrl = resolveUrl(href)
@@ -435,9 +438,16 @@ val episodes = mutableListOf<Episode>()
if (episodeId in seenEpisodeIds) continue
seenEpisodeIds.add(episodeId)
val titleEl = link.selectFirst(".ep-item-title, .item-title, strong, span, .title")
val titleEl = link.selectFirst(".ep-item-title, .item-title, strong, span, .title, .bo_v_title")
val linkText = titleEl?.text()?.trim() ?: link.text().trim()
if (linkText.isBlank()) continue
if (linkText.contains("로그인") || linkText.contains("") || linkText.contains("영화") ||
linkText.contains("드라마") || linkText.contains("예능") || linkText.contains("검색") ||
linkText.contains("전체") || linkText.contains("다음") || linkText.contains("재생")) {
continue
}
val episodeNumMatch = Pattern.compile("(\\d+)\\s*화|(\\d+)\\s*회|EP\\.?(\\d+)|제\\s*(\\d+)\\s*부").matcher(linkText)
val episodeTitle = if (episodeNumMatch.find()) {
episodeNumMatch.group(1) ?: episodeNumMatch.group(2) ?: episodeNumMatch.group(3) ?: episodeNumMatch.group(4)