Skip to content

Commit

Permalink
Merge pull request #1108 from AudricV/yt_refactor-js-usage
Browse files Browse the repository at this point in the history
[YouTube] Refactor JavaScript usage and fix extraction of obfuscated signature deobfuscation function
  • Loading branch information
Stypox authored Sep 22, 2023
2 parents 3be76a6 + 6ed2209 commit 289db11
Show file tree
Hide file tree
Showing 119 changed files with 1,655 additions and 1,319 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,14 @@
import org.schabi.newpipe.extractor.utils.Parser;

import javax.annotation.Nonnull;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Pattern;

/**
* The extractor of YouTube's base JavaScript player file.
*
* <p>
* YouTube restrict streaming their media in multiple ways by requiring their HTML5 clients to use
* a signature timestamp, and on streaming URLs a signature deobfuscation function for some
* contents and a throttling parameter deobfuscation one for all contents.
* </p>
*
* <p>
* This class handles fetching of this base JavaScript player file in order to allow other classes
* to extract the needed data.
* </p>
Expand All @@ -31,7 +27,7 @@
* watch page as a fallback.
* </p>
*/
public final class YoutubeJavaScriptExtractor {
final class YoutubeJavaScriptExtractor {

private static final String HTTPS = "https:";
private static final String BASE_JS_PLAYER_URL_FORMAT =
Expand All @@ -40,49 +36,45 @@ public final class YoutubeJavaScriptExtractor {
"player\\\\/([a-z0-9]{8})\\\\/");
private static final Pattern EMBEDDED_WATCH_PAGE_JS_BASE_PLAYER_URL_PATTERN = Pattern.compile(
"\"jsUrl\":\"(/s/player/[A-Za-z0-9]+/player_ias\\.vflset/[A-Za-z_-]+/base\\.js)\"");
private static String cachedJavaScriptCode;

private YoutubeJavaScriptExtractor() {
}

/**
* Extracts the JavaScript file.
* Extracts the JavaScript base player file.
*
* <p>
* The result is cached, so subsequent calls use the result of previous calls.
* </p>
*
* @param videoId a YouTube video ID, which doesn't influence the result, but it may help in
* the chance that YouTube track it
* @return the whole JavaScript file as a string
* @throws ParsingException if the extraction failed
* @param videoId the video ID used to get the JavaScript base player file (an empty one can be
* passed, even it is not recommend in order to spoof better official YouTube
* clients)
* @return the whole JavaScript base player file as a string
* @throws ParsingException if the extraction of the file failed
*/
@Nonnull
public static String extractJavaScriptCode(@Nonnull final String videoId)
static String extractJavaScriptPlayerCode(@Nonnull final String videoId)
throws ParsingException {
if (cachedJavaScriptCode == null) {
String url;
try {
url = YoutubeJavaScriptExtractor.extractJavaScriptUrlWithIframeResource();
} catch (final Exception e) {
url = YoutubeJavaScriptExtractor.extractJavaScriptUrlWithEmbedWatchPage(videoId);
}
String url;
try {
url = YoutubeJavaScriptExtractor.extractJavaScriptUrlWithIframeResource();
final String playerJsUrl = YoutubeJavaScriptExtractor.cleanJavaScriptUrl(url);
cachedJavaScriptCode = YoutubeJavaScriptExtractor.downloadJavaScriptCode(playerJsUrl);
}

return cachedJavaScriptCode;
}
// Assert that the URL we extracted and built is valid
new URL(playerJsUrl);

/**
* Reset the cached JavaScript code.
*
* <p>
* It will be fetched again the next time {@link #extractJavaScriptCode(String)} is called.
* </p>
*/
public static void resetJavaScriptCode() {
cachedJavaScriptCode = null;
return YoutubeJavaScriptExtractor.downloadJavaScriptCode(playerJsUrl);
} catch (final Exception e) {
url = YoutubeJavaScriptExtractor.extractJavaScriptUrlWithEmbedWatchPage(videoId);
final String playerJsUrl = YoutubeJavaScriptExtractor.cleanJavaScriptUrl(url);

try {
// Assert that the URL we extracted and built is valid
new URL(playerJsUrl);
} catch (final MalformedURLException exception) {
throw new ParsingException(
"The extracted and built JavaScript URL is invalid", exception);
}

return YoutubeJavaScriptExtractor.downloadJavaScriptCode(playerJsUrl);
}
}

@Nonnull
Expand Down Expand Up @@ -134,7 +126,7 @@ static String extractJavaScriptUrlWithEmbedWatchPage(@Nonnull final String video
}
}

// Use regexes to match the URL in a JavaScript embedded script of the HTML page
// Use regexes to match the URL in an embedded script of the HTML page
try {
return Parser.matchGroup1(
EMBEDDED_WATCH_PAGE_JS_BASE_PLAYER_URL_PATTERN, embedPageContent);
Expand All @@ -145,29 +137,28 @@ static String extractJavaScriptUrlWithEmbedWatchPage(@Nonnull final String video
}

@Nonnull
private static String cleanJavaScriptUrl(@Nonnull final String playerJsUrl) {
if (playerJsUrl.startsWith("//")) {
private static String cleanJavaScriptUrl(@Nonnull final String javaScriptPlayerUrl) {
if (javaScriptPlayerUrl.startsWith("//")) {
// https part has to be added manually if the URL is protocol-relative
return HTTPS + playerJsUrl;
} else if (playerJsUrl.startsWith("/")) {
return HTTPS + javaScriptPlayerUrl;
} else if (javaScriptPlayerUrl.startsWith("/")) {
// https://www.youtube.com part has to be added manually if the URL is relative to
// YouTube's domain
return HTTPS + "//www.youtube.com" + playerJsUrl;
return HTTPS + "//www.youtube.com" + javaScriptPlayerUrl;
} else {
return playerJsUrl;
return javaScriptPlayerUrl;
}
}

@Nonnull
private static String downloadJavaScriptCode(@Nonnull final String playerJsUrl)
private static String downloadJavaScriptCode(@Nonnull final String javaScriptPlayerUrl)
throws ParsingException {
try {
return NewPipe.getDownloader()
.get(playerJsUrl, Localization.DEFAULT)
.get(javaScriptPlayerUrl, Localization.DEFAULT)
.responseBody();
} catch (final Exception e) {
throw new ParsingException(
"Could not get JavaScript base player's code from URL: " + playerJsUrl, e);
throw new ParsingException("Could not get JavaScript base player's code", e);
}
}
}
Loading

0 comments on commit 289db11

Please sign in to comment.