Replace gdrive item scraper (#1523)

This commit is contained in:
Secozzi
2023-04-21 10:27:03 +02:00
committed by GitHub
parent d998959703
commit 8d4af030da
2 changed files with 152 additions and 40 deletions

View File

@ -6,7 +6,7 @@ ext {
extName = 'Kayoanime'
pkgNameSuffix = 'en.kayoanime'
extClass = '.Kayoanime'
extVersionCode = 2
extVersionCode = 3
libVersion = '13'
}

View File

@ -20,13 +20,12 @@ import eu.kanade.tachiyomi.util.asJsoup
import kotlinx.serialization.Serializable
import kotlinx.serialization.decodeFromString
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.JsonElement
import kotlinx.serialization.json.jsonArray
import kotlinx.serialization.json.jsonPrimitive
import okhttp3.FormBody
import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.OkHttpClient
import okhttp3.Request
import okhttp3.RequestBody.Companion.toRequestBody
import okhttp3.Response
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
@ -35,6 +34,7 @@ import rx.Observable
import uy.kohesive.injekt.Injekt
import uy.kohesive.injekt.api.get
import uy.kohesive.injekt.injectLazy
import java.security.MessageDigest
import java.text.CharacterIterator
import java.text.SimpleDateFormat
import java.text.StringCharacterIterator
@ -314,59 +314,116 @@ class Kayoanime : ConfigurableAnimeSource, ParsedAnimeHttpSource() {
// ============================== Episodes ==============================
// Lots of code borrowed from https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/extractor/googledrive.py under the `GoogleDriveFolderIE` class
override fun episodeListParse(response: Response): List<SEpisode> {
val document = response.asJsoup()
val episodeList = mutableListOf<SEpisode>()
val keyRegex = """"(\w{39})"""".toRegex()
val versionRegex = """"([^"]+web-frontend[^"]+)"""".toRegex()
val jsonRegex = """(?:)\s*(\{(.+)\})\s*(?:)""".toRegex(RegexOption.DOT_MATCHES_ALL)
val boundary = "=====vc17a3rwnndj====="
fun traverseFolder(url: String, path: String, recursionDepth: Int = 0) {
if (recursionDepth == MAX_RECURSION_DEPTH) return
val headers = headers.newBuilder()
val folderId = url.substringAfter("/folders/")
val driveHeaders = headers.newBuilder()
.add("Accept", "*/*")
.add("Connection", "keep-alive")
.add("Cookie", getCookie("https://drive.google.com"))
.add("Host", "drive.google.com")
.build()
val driveDocument = client.newCall(
GET(url, headers = headers.build()),
GET(url, headers = driveHeaders),
).execute().asJsoup()
if (driveDocument.selectFirst("title:contains(Error 404 \\(Not found\\))") != null) return
if (driveDocument.selectFirst("script:containsData(requestAccess)") != null) {
throw Exception("Please log in through webview on google drive & join group")
}
val keyScript = driveDocument.select("script").first { script ->
keyRegex.find(script.data()) != null
}.data()
val key = keyRegex.find(keyScript)?.groupValues?.get(1) ?: ""
val script = driveDocument.selectFirst("script:containsData(_DRIVE_ivd)") ?: return
val data = script.data().substringAfter("['_DRIVE_ivd'] = '").substringBeforeLast("';")
val decoded = Regex("\\\\x([0-9a-fA-F]{2})").replace(data) { matchResult ->
Integer.parseInt(matchResult.groupValues[1], 16).toChar().toString()
}.replace("\\\\\"", "\\\"") // Dirty fix, happens when item names includes `"`
val versionScript = driveDocument.select("script").first { script ->
keyRegex.find(script.data()) != null
}.data()
val driveVersion = versionRegex.find(versionScript)?.groupValues?.get(1) ?: ""
val sapisid = client.cookieJar.loadForRequest("https://drive.google.com".toHttpUrl()).firstOrNull {
it.name == "SAPISID" || it.name == "__Secure-3PAPISID"
}?.value ?: ""
val folderArr = json.decodeFromString<List<JsonElement>>(decoded)
var pageToken: String? = ""
while (pageToken != null) {
val requestUrl = "/drive/v2beta/files?openDrive=true&reason=102&syncType=0&errorRecovery=false&q=trashed%20%3D%20false%20and%20'$folderId'%20in%20parents&fields=kind%2CnextPageToken%2Citems(kind%2CmodifiedDate%2CmodifiedByMeDate%2ClastViewedByMeDate%2CfileSize%2Cowners(kind%2CpermissionId%2Cid)%2ClastModifyingUser(kind%2CpermissionId%2Cid)%2ChasThumbnail%2CthumbnailVersion%2Ctitle%2Cid%2CresourceKey%2Cshared%2CsharedWithMeDate%2CuserPermission(role)%2CexplicitlyTrashed%2CmimeType%2CquotaBytesUsed%2Ccopyable%2CfileExtension%2CsharingUser(kind%2CpermissionId%2Cid)%2Cspaces%2Cversion%2CteamDriveId%2ChasAugmentedPermissions%2CcreatedDate%2CtrashingUser(kind%2CpermissionId%2Cid)%2CtrashedDate%2Cparents(id)%2CshortcutDetails(targetId%2CtargetMimeType%2CtargetLookupStatus)%2Ccapabilities(canCopy%2CcanDownload%2CcanEdit%2CcanAddChildren%2CcanDelete%2CcanRemoveChildren%2CcanShare%2CcanTrash%2CcanRename%2CcanReadTeamDrive%2CcanMoveTeamDriveItem)%2Clabels(starred%2Ctrashed%2Crestricted%2Cviewed))%2CincompleteSearch&appDataFilter=NO_APP_DATA&spaces=drive&pageToken=$pageToken&maxResults=50&supportsTeamDrives=true&includeItemsFromAllDrives=true&corpora=default&orderBy=folder%2Ctitle_natural%20asc&retryCount=0&key=$key HTTP/1.1"
val body = """--$boundary
|content-type: application/http
|content-transfer-encoding: binary
|
|GET $requestUrl
|X-Goog-Drive-Client-Version: $driveVersion
|authorization: ${generateSapisidhashHeader(sapisid)}
|x-goog-authuser: 0
|
|--$boundary
|
""".trimMargin("|").toRequestBody("multipart/mixed; boundary=\"$boundary\"".toMediaType())
folderArr.first().jsonArray.forEachIndexed { index, item ->
val size = item.jsonArray.getOrNull(13)?.let { t -> formatBytes(t.toString().toLongOrNull()) }
val name = item.jsonArray.getOrNull(2)?.jsonPrimitive?.content ?: "Name unavailable"
val id = item.jsonArray.getOrNull(0)?.jsonPrimitive?.content ?: ""
val type = item.jsonArray.getOrNull(3)?.jsonPrimitive?.content ?: "Unknown type"
if (type.startsWith("video")) {
val episode = SEpisode.create()
episode.scanlator = if (preferences.getBoolean("scanlator_order", false)) {
"/${path.trim()}$size"
} else {
"$size • /${path.trim()}"
val postUrl = "https://clients6.google.com/batch/drive/v2beta".toHttpUrl().newBuilder()
.addQueryParameter("${'$'}ct", "multipart/mixed;boundary=\"$boundary\"")
.addQueryParameter("key", key)
.build()
.toString()
val postHeaders = headers.newBuilder()
.add("Content-Type", "text/plain; charset=UTF-8")
.add("Origin", "https://drive.google.com")
.add("Cookie", getCookie("https://drive.google.com"))
.build()
val response = client.newCall(
POST(postUrl, body = body, headers = postHeaders),
).execute()
val parsed = json.decodeFromString<GDrivePostResponse>(
jsonRegex.find(response.body.string())!!.groupValues[1],
)
if (parsed.items == null) throw Exception("Failed to load items, please log in through webview")
parsed.items.forEachIndexed { index, it ->
if (it.mimeType.startsWith("video")) {
val episode = SEpisode.create()
val size = formatBytes(it.fileSize?.toLongOrNull())
val pathName = if (preferences.getBoolean("trim_info", false)) {
path.trimInfo()
} else {
path
}
val itemNumberRegex = """ - (?:S\d+E)?(\d+)""".toRegex()
episode.scanlator = if (preferences.getBoolean("scanlator_order", false)) {
"/$pathName$size"
} else {
"$size • /$pathName"
}
episode.name = if (preferences.getBoolean("trim_episode", false)) {
it.title.trimInfo()
} else {
it.title
}
episode.url = "https://drive.google.com/uc?id=${it.id}"
episode.episode_number = itemNumberRegex.find(it.title.trimInfo())?.groupValues?.get(1)?.toFloatOrNull() ?: index.toFloat()
episode.date_upload = -1L
episodeList.add(episode)
}
if (it.mimeType.endsWith(".folder")) {
traverseFolder(
"https://drive.google.com/drive/folders/${it.id}",
"$path/${it.title}",
recursionDepth + 1,
)
}
episode.name = name.removePrefix("[Kayoanime] ")
episode.url = "https://drive.google.com/uc?id=$id"
episode.episode_number = index.toFloat()
episode.date_upload = -1L
episodeList.add(episode)
}
if (type.endsWith(".folder")) {
traverseFolder(
"https://drive.google.com/drive/folders/$id",
"$path/$name",
recursionDepth + 1,
)
}
pageToken = parsed.nextPageToken
}
}
@ -374,7 +431,7 @@ class Kayoanime : ConfigurableAnimeSource, ParsedAnimeHttpSource() {
getVideoPathsFromElement(t)
}.forEach { season ->
season.select("a[href*=drive.google.com]").distinctBy { it.text() }.forEach {
val url = it.selectFirst("a[href*=drive.google.com]")!!.attr("href").substringBeforeLast("?usp=share_link")
val url = it.selectFirst("a[href*=drive.google.com]")!!.attr("href").substringBeforeLast("?usp=shar")
traverseFolder(url, getVideoPathsFromElement(season) + " " + it.text())
}
}
@ -394,7 +451,6 @@ class Kayoanime : ConfigurableAnimeSource, ParsedAnimeHttpSource() {
preferences.getBoolean("scanlator_order", false),
),
)
// getVideoPathsFromElement(season) + " " + it.text()
}
}
}
@ -436,6 +492,44 @@ class Kayoanime : ConfigurableAnimeSource, ParsedAnimeHttpSource() {
// ============================= Utilities ==============================
// https://github.com/yt-dlp/yt-dlp/blob/8f0be90ecb3b8d862397177bb226f17b245ef933/yt_dlp/extractor/youtube.py#L573
private fun generateSapisidhashHeader(SAPISID: String, origin: String = "https://drive.google.com"): String {
val timeNow = System.currentTimeMillis() / 1000
// SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
val sapisidhash = MessageDigest
.getInstance("SHA-1")
.digest("$timeNow $SAPISID $origin".toByteArray())
.joinToString("") { "%02x".format(it) }
return "SAPISIDHASH ${timeNow}_$sapisidhash"
}
@Serializable
data class GDrivePostResponse(
val nextPageToken: String? = null,
val items: List<ResponseItem>? = null,
) {
@Serializable
data class ResponseItem(
val id: String,
val title: String,
val mimeType: String,
val fileSize: String? = null,
)
}
private fun String.trimInfo(): String {
var newString = this.replaceFirst("""^\[\w+\] """.toRegex(), "")
val regex = """( ?\[[\s\w-]+\]| ?\([\s\w-]+\))(\.mkv|\.mp4|\.avi)?${'$'}""".toRegex()
while (regex.containsMatchIn(newString)) {
newString = regex.replace(newString) { matchResult ->
matchResult.groups[2]?.value ?: ""
}
}
return newString.trim()
}
private fun getIndexVideoUrl(url: String): List<Video> {
val doc = client.newCall(
GET("$url?a=view"),
@ -518,7 +612,25 @@ class Kayoanime : ConfigurableAnimeSource, ParsedAnimeHttpSource() {
preferences.edit().putBoolean(key, newValue as Boolean).commit()
}
}
val trimEpisodeName = SwitchPreferenceCompat(screen.context).apply {
key = "trim_episode"
title = "Trim info from episode name"
setDefaultValue(true)
setOnPreferenceChangeListener { _, newValue ->
preferences.edit().putBoolean(key, newValue as Boolean).commit()
}
}
val trimEpisodeInfo = SwitchPreferenceCompat(screen.context).apply {
key = "trim_info"
title = "Trim info from episode info"
setDefaultValue(false)
setOnPreferenceChangeListener { _, newValue ->
preferences.edit().putBoolean(key, newValue as Boolean).commit()
}
}
screen.addPreference(scanlatorOrder)
screen.addPreference(trimEpisodeName)
screen.addPreference(trimEpisodeInfo)
}
}