Fix episode list parsing with more comprehensive CSS selectors
This commit is contained in:
@@ -407,7 +407,7 @@ class TvmonScraper {
|
||||
}
|
||||
}
|
||||
|
||||
val episodes = mutableListOf<Episode>()
|
||||
val episodes = mutableListOf<Episode>()
|
||||
val videoLinks = mutableListOf<VideoLink>()
|
||||
val seenEpisodeIds = mutableSetOf<String>()
|
||||
val seenNumbers = mutableSetOf<String>()
|
||||
@@ -417,7 +417,9 @@ val episodes = mutableListOf<Episode>()
|
||||
val allEpisodeLinks = doc.select(
|
||||
".next-ep-list-scroll .ep-item, .ep-item, .bo_v_list li a, #bo_v_list li a, " +
|
||||
".list_body .item a, .ep-list a, .episode-list a, .ep-link a, " +
|
||||
"a[href*='/$seriesId/'], a[href*='/${seriesId}/']"
|
||||
".bo_v_list a, .view-list a, a[href*='/$seriesId/'], a[href*='/${seriesId}/'], " +
|
||||
".episode-item a, .episodelist a, #episode-list a, .all-episode-list a, " +
|
||||
".bo_v_title + div a, .list-wrap a[href*='/$seriesId/']"
|
||||
)
|
||||
|
||||
var episodeIndex = 0
|
||||
@@ -425,6 +427,7 @@ val episodes = mutableListOf<Episode>()
|
||||
for (link in allEpisodeLinks) {
|
||||
val href = link.attr("href")
|
||||
if (href.isBlank()) continue
|
||||
if (!href.contains("/$seriesId/") && !href.contains("/${seriesId}/")) continue
|
||||
|
||||
val fullUrl = resolveUrl(href)
|
||||
|
||||
@@ -435,9 +438,16 @@ val episodes = mutableListOf<Episode>()
|
||||
if (episodeId in seenEpisodeIds) continue
|
||||
seenEpisodeIds.add(episodeId)
|
||||
|
||||
val titleEl = link.selectFirst(".ep-item-title, .item-title, strong, span, .title")
|
||||
val titleEl = link.selectFirst(".ep-item-title, .item-title, strong, span, .title, .bo_v_title")
|
||||
val linkText = titleEl?.text()?.trim() ?: link.text().trim()
|
||||
|
||||
if (linkText.isBlank()) continue
|
||||
if (linkText.contains("로그인") || linkText.contains("홈") || linkText.contains("영화") ||
|
||||
linkText.contains("드라마") || linkText.contains("예능") || linkText.contains("검색") ||
|
||||
linkText.contains("전체") || linkText.contains("다음") || linkText.contains("재생")) {
|
||||
continue
|
||||
}
|
||||
|
||||
val episodeNumMatch = Pattern.compile("(\\d+)\\s*화|(\\d+)\\s*회|EP\\.?(\\d+)|제\\s*(\\d+)\\s*부").matcher(linkText)
|
||||
val episodeTitle = if (episodeNumMatch.find()) {
|
||||
episodeNumMatch.group(1) ?: episodeNumMatch.group(2) ?: episodeNumMatch.group(3) ?: episodeNumMatch.group(4)
|
||||
|
||||
Reference in New Issue
Block a user