Replace gdrive item scraper (#1523)

This commit is contained in:
Secozzi
2023-04-21 10:27:03 +02:00
committed by GitHub
parent d998959703
commit 8d4af030da
2 changed files with 152 additions and 40 deletions

View File

@ -6,7 +6,7 @@ ext {
extName = 'Kayoanime' extName = 'Kayoanime'
pkgNameSuffix = 'en.kayoanime' pkgNameSuffix = 'en.kayoanime'
extClass = '.Kayoanime' extClass = '.Kayoanime'
extVersionCode = 2 extVersionCode = 3
libVersion = '13' libVersion = '13'
} }

View File

@ -20,13 +20,12 @@ import eu.kanade.tachiyomi.util.asJsoup
import kotlinx.serialization.Serializable import kotlinx.serialization.Serializable
import kotlinx.serialization.decodeFromString import kotlinx.serialization.decodeFromString
import kotlinx.serialization.json.Json import kotlinx.serialization.json.Json
import kotlinx.serialization.json.JsonElement
import kotlinx.serialization.json.jsonArray
import kotlinx.serialization.json.jsonPrimitive
import okhttp3.FormBody import okhttp3.FormBody
import okhttp3.HttpUrl.Companion.toHttpUrl import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.OkHttpClient import okhttp3.OkHttpClient
import okhttp3.Request import okhttp3.Request
import okhttp3.RequestBody.Companion.toRequestBody
import okhttp3.Response import okhttp3.Response
import org.jsoup.Jsoup import org.jsoup.Jsoup
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
@ -35,6 +34,7 @@ import rx.Observable
import uy.kohesive.injekt.Injekt import uy.kohesive.injekt.Injekt
import uy.kohesive.injekt.api.get import uy.kohesive.injekt.api.get
import uy.kohesive.injekt.injectLazy import uy.kohesive.injekt.injectLazy
import java.security.MessageDigest
import java.text.CharacterIterator import java.text.CharacterIterator
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.text.StringCharacterIterator import java.text.StringCharacterIterator
@ -314,59 +314,116 @@ class Kayoanime : ConfigurableAnimeSource, ParsedAnimeHttpSource() {
// ============================== Episodes ============================== // ============================== Episodes ==============================
// Lots of code borrowed from https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/extractor/googledrive.py under the `GoogleDriveFolderIE` class
override fun episodeListParse(response: Response): List<SEpisode> { override fun episodeListParse(response: Response): List<SEpisode> {
val document = response.asJsoup() val document = response.asJsoup()
val episodeList = mutableListOf<SEpisode>() val episodeList = mutableListOf<SEpisode>()
val keyRegex = """"(\w{39})"""".toRegex()
val versionRegex = """"([^"]+web-frontend[^"]+)"""".toRegex()
val jsonRegex = """(?:)\s*(\{(.+)\})\s*(?:)""".toRegex(RegexOption.DOT_MATCHES_ALL)
val boundary = "=====vc17a3rwnndj====="
fun traverseFolder(url: String, path: String, recursionDepth: Int = 0) { fun traverseFolder(url: String, path: String, recursionDepth: Int = 0) {
if (recursionDepth == MAX_RECURSION_DEPTH) return if (recursionDepth == MAX_RECURSION_DEPTH) return
val headers = headers.newBuilder()
val folderId = url.substringAfter("/folders/")
val driveHeaders = headers.newBuilder()
.add("Accept", "*/*") .add("Accept", "*/*")
.add("Connection", "keep-alive") .add("Connection", "keep-alive")
.add("Cookie", getCookie("https://drive.google.com")) .add("Cookie", getCookie("https://drive.google.com"))
.add("Host", "drive.google.com") .add("Host", "drive.google.com")
.build()
val driveDocument = client.newCall( val driveDocument = client.newCall(
GET(url, headers = headers.build()), GET(url, headers = driveHeaders),
).execute().asJsoup() ).execute().asJsoup()
if (driveDocument.selectFirst("title:contains(Error 404 \\(Not found\\))") != null) return
if (driveDocument.selectFirst("script:containsData(requestAccess)") != null) { val keyScript = driveDocument.select("script").first { script ->
throw Exception("Please log in through webview on google drive & join group") keyRegex.find(script.data()) != null
} }.data()
val key = keyRegex.find(keyScript)?.groupValues?.get(1) ?: ""
val script = driveDocument.selectFirst("script:containsData(_DRIVE_ivd)") ?: return val versionScript = driveDocument.select("script").first { script ->
val data = script.data().substringAfter("['_DRIVE_ivd'] = '").substringBeforeLast("';") keyRegex.find(script.data()) != null
val decoded = Regex("\\\\x([0-9a-fA-F]{2})").replace(data) { matchResult -> }.data()
Integer.parseInt(matchResult.groupValues[1], 16).toChar().toString() val driveVersion = versionRegex.find(versionScript)?.groupValues?.get(1) ?: ""
}.replace("\\\\\"", "\\\"") // Dirty fix, happens when item names includes `"` val sapisid = client.cookieJar.loadForRequest("https://drive.google.com".toHttpUrl()).firstOrNull {
it.name == "SAPISID" || it.name == "__Secure-3PAPISID"
}?.value ?: ""
val folderArr = json.decodeFromString<List<JsonElement>>(decoded) var pageToken: String? = ""
while (pageToken != null) {
val requestUrl = "/drive/v2beta/files?openDrive=true&reason=102&syncType=0&errorRecovery=false&q=trashed%20%3D%20false%20and%20'$folderId'%20in%20parents&fields=kind%2CnextPageToken%2Citems(kind%2CmodifiedDate%2CmodifiedByMeDate%2ClastViewedByMeDate%2CfileSize%2Cowners(kind%2CpermissionId%2Cid)%2ClastModifyingUser(kind%2CpermissionId%2Cid)%2ChasThumbnail%2CthumbnailVersion%2Ctitle%2Cid%2CresourceKey%2Cshared%2CsharedWithMeDate%2CuserPermission(role)%2CexplicitlyTrashed%2CmimeType%2CquotaBytesUsed%2Ccopyable%2CfileExtension%2CsharingUser(kind%2CpermissionId%2Cid)%2Cspaces%2Cversion%2CteamDriveId%2ChasAugmentedPermissions%2CcreatedDate%2CtrashingUser(kind%2CpermissionId%2Cid)%2CtrashedDate%2Cparents(id)%2CshortcutDetails(targetId%2CtargetMimeType%2CtargetLookupStatus)%2Ccapabilities(canCopy%2CcanDownload%2CcanEdit%2CcanAddChildren%2CcanDelete%2CcanRemoveChildren%2CcanShare%2CcanTrash%2CcanRename%2CcanReadTeamDrive%2CcanMoveTeamDriveItem)%2Clabels(starred%2Ctrashed%2Crestricted%2Cviewed))%2CincompleteSearch&appDataFilter=NO_APP_DATA&spaces=drive&pageToken=$pageToken&maxResults=50&supportsTeamDrives=true&includeItemsFromAllDrives=true&corpora=default&orderBy=folder%2Ctitle_natural%20asc&retryCount=0&key=$key HTTP/1.1"
val body = """--$boundary
|content-type: application/http
|content-transfer-encoding: binary
|
|GET $requestUrl
|X-Goog-Drive-Client-Version: $driveVersion
|authorization: ${generateSapisidhashHeader(sapisid)}
|x-goog-authuser: 0
|
|--$boundary
|
""".trimMargin("|").toRequestBody("multipart/mixed; boundary=\"$boundary\"".toMediaType())
folderArr.first().jsonArray.forEachIndexed { index, item -> val postUrl = "https://clients6.google.com/batch/drive/v2beta".toHttpUrl().newBuilder()
val size = item.jsonArray.getOrNull(13)?.let { t -> formatBytes(t.toString().toLongOrNull()) } .addQueryParameter("${'$'}ct", "multipart/mixed;boundary=\"$boundary\"")
val name = item.jsonArray.getOrNull(2)?.jsonPrimitive?.content ?: "Name unavailable" .addQueryParameter("key", key)
val id = item.jsonArray.getOrNull(0)?.jsonPrimitive?.content ?: "" .build()
val type = item.jsonArray.getOrNull(3)?.jsonPrimitive?.content ?: "Unknown type" .toString()
if (type.startsWith("video")) {
val episode = SEpisode.create() val postHeaders = headers.newBuilder()
episode.scanlator = if (preferences.getBoolean("scanlator_order", false)) { .add("Content-Type", "text/plain; charset=UTF-8")
"/${path.trim()}$size" .add("Origin", "https://drive.google.com")
} else { .add("Cookie", getCookie("https://drive.google.com"))
"$size • /${path.trim()}" .build()
val response = client.newCall(
POST(postUrl, body = body, headers = postHeaders),
).execute()
val parsed = json.decodeFromString<GDrivePostResponse>(
jsonRegex.find(response.body.string())!!.groupValues[1],
)
if (parsed.items == null) throw Exception("Failed to load items, please log in through webview")
parsed.items.forEachIndexed { index, it ->
if (it.mimeType.startsWith("video")) {
val episode = SEpisode.create()
val size = formatBytes(it.fileSize?.toLongOrNull())
val pathName = if (preferences.getBoolean("trim_info", false)) {
path.trimInfo()
} else {
path
}
val itemNumberRegex = """ - (?:S\d+E)?(\d+)""".toRegex()
episode.scanlator = if (preferences.getBoolean("scanlator_order", false)) {
"/$pathName$size"
} else {
"$size • /$pathName"
}
episode.name = if (preferences.getBoolean("trim_episode", false)) {
it.title.trimInfo()
} else {
it.title
}
episode.url = "https://drive.google.com/uc?id=${it.id}"
episode.episode_number = itemNumberRegex.find(it.title.trimInfo())?.groupValues?.get(1)?.toFloatOrNull() ?: index.toFloat()
episode.date_upload = -1L
episodeList.add(episode)
}
if (it.mimeType.endsWith(".folder")) {
traverseFolder(
"https://drive.google.com/drive/folders/${it.id}",
"$path/${it.title}",
recursionDepth + 1,
)
} }
episode.name = name.removePrefix("[Kayoanime] ")
episode.url = "https://drive.google.com/uc?id=$id"
episode.episode_number = index.toFloat()
episode.date_upload = -1L
episodeList.add(episode)
}
if (type.endsWith(".folder")) {
traverseFolder(
"https://drive.google.com/drive/folders/$id",
"$path/$name",
recursionDepth + 1,
)
} }
pageToken = parsed.nextPageToken
} }
} }
@ -374,7 +431,7 @@ class Kayoanime : ConfigurableAnimeSource, ParsedAnimeHttpSource() {
getVideoPathsFromElement(t) getVideoPathsFromElement(t)
}.forEach { season -> }.forEach { season ->
season.select("a[href*=drive.google.com]").distinctBy { it.text() }.forEach { season.select("a[href*=drive.google.com]").distinctBy { it.text() }.forEach {
val url = it.selectFirst("a[href*=drive.google.com]")!!.attr("href").substringBeforeLast("?usp=share_link") val url = it.selectFirst("a[href*=drive.google.com]")!!.attr("href").substringBeforeLast("?usp=shar")
traverseFolder(url, getVideoPathsFromElement(season) + " " + it.text()) traverseFolder(url, getVideoPathsFromElement(season) + " " + it.text())
} }
} }
@ -394,7 +451,6 @@ class Kayoanime : ConfigurableAnimeSource, ParsedAnimeHttpSource() {
preferences.getBoolean("scanlator_order", false), preferences.getBoolean("scanlator_order", false),
), ),
) )
// getVideoPathsFromElement(season) + " " + it.text()
} }
} }
} }
@ -436,6 +492,44 @@ class Kayoanime : ConfigurableAnimeSource, ParsedAnimeHttpSource() {
// ============================= Utilities ============================== // ============================= Utilities ==============================
// https://github.com/yt-dlp/yt-dlp/blob/8f0be90ecb3b8d862397177bb226f17b245ef933/yt_dlp/extractor/youtube.py#L573
private fun generateSapisidhashHeader(SAPISID: String, origin: String = "https://drive.google.com"): String {
val timeNow = System.currentTimeMillis() / 1000
// SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
val sapisidhash = MessageDigest
.getInstance("SHA-1")
.digest("$timeNow $SAPISID $origin".toByteArray())
.joinToString("") { "%02x".format(it) }
return "SAPISIDHASH ${timeNow}_$sapisidhash"
}
@Serializable
data class GDrivePostResponse(
val nextPageToken: String? = null,
val items: List<ResponseItem>? = null,
) {
@Serializable
data class ResponseItem(
val id: String,
val title: String,
val mimeType: String,
val fileSize: String? = null,
)
}
private fun String.trimInfo(): String {
var newString = this.replaceFirst("""^\[\w+\] """.toRegex(), "")
val regex = """( ?\[[\s\w-]+\]| ?\([\s\w-]+\))(\.mkv|\.mp4|\.avi)?${'$'}""".toRegex()
while (regex.containsMatchIn(newString)) {
newString = regex.replace(newString) { matchResult ->
matchResult.groups[2]?.value ?: ""
}
}
return newString.trim()
}
private fun getIndexVideoUrl(url: String): List<Video> { private fun getIndexVideoUrl(url: String): List<Video> {
val doc = client.newCall( val doc = client.newCall(
GET("$url?a=view"), GET("$url?a=view"),
@ -518,7 +612,25 @@ class Kayoanime : ConfigurableAnimeSource, ParsedAnimeHttpSource() {
preferences.edit().putBoolean(key, newValue as Boolean).commit() preferences.edit().putBoolean(key, newValue as Boolean).commit()
} }
} }
val trimEpisodeName = SwitchPreferenceCompat(screen.context).apply {
key = "trim_episode"
title = "Trim info from episode name"
setDefaultValue(true)
setOnPreferenceChangeListener { _, newValue ->
preferences.edit().putBoolean(key, newValue as Boolean).commit()
}
}
val trimEpisodeInfo = SwitchPreferenceCompat(screen.context).apply {
key = "trim_info"
title = "Trim info from episode info"
setDefaultValue(false)
setOnPreferenceChangeListener { _, newValue ->
preferences.edit().putBoolean(key, newValue as Boolean).commit()
}
}
screen.addPreference(scanlatorOrder) screen.addPreference(scanlatorOrder)
screen.addPreference(trimEpisodeName)
screen.addPreference(trimEpisodeInfo)
} }
} }