How can programmatically get subtitles of a playing YouTube video?
Initially I've tried to do it offline via YouTube API, but as it seems YouTube forbids to fetch subtitles of videos you are not the owner.
Now I'm trying to do it online. I haven't found YouTube Player Api methods for captions, also I've tried to get YouTube captions as TextTrack with videojs player in the way it could be done for usual videos, but the following doesn't work:
<html>
<head>
<link href="//vjs.zencdn.net/4.12/video-js.css" rel="stylesheet">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
<script type="text/javascript" src="//vjs.zencdn.net/4.12/video.js"></script>
<script type="text/javascript" src="../lib/youtube.js"></script>
</head>
<body>
<video id="myvideo"
class="video-js vjs-default-skin vjs-big-play-centered"
controls
preload="auto"
width="640"
height="360">
</video>
<script type="text/javascript">
var myvideo = videojs(
"myvideo",
{
"techOrder": ["youtube"],
"src": "https://www.youtube.com/watch?v=jNhtbmXzIaM"
},
function() {
console.log('Tracks: ' + this.textTracks().length); //zero here :(
/*var aTextTrack = this.textTracks()[0];
aTextTrack.on('loaded', function() {
console.log('here it is');
cues = aTextTrack.cues();
console.log('Ready State', aTextTrack.readyState())
console.log('Cues', cues);
});
aTextTrack.show();*/
});
</script>
</body>
</html>
I've also tried an ugly solution with parsing YouTube Player IFrame (there is a div inside it with current subtitles' line), but it doesn't work because of origin mismatch security issues.
Is there any way my goal can be achieved in java (for offline solutions) or javascript (for online solutions)?
Building on Sergiu Mare's suggestion, I wrote an encapsulated function that can return the captions in the console.
This is written in pure JavaScript (ES6) and you can test it out below, or you can copy everything below and paste it in the console of any video that has captions.
const main = async () => {
const
defaultId = 'fJ9rUzIMcZQ', /* Queen – Bohemian Rhapsody */
json = await YouTubeCaptionUtil
.fetchCaptions(YouTubeCaptionUtil.videoId() || defaultId),
csv = CsvUtil.fromJson(json);
console.log(csv);
};
class YouTubeCaptionUtil {
static async fetchCaptions(videoId, options) {
const
opts = { ...YouTubeCaptionUtil.defaultOptions, ...options },
response = await fetch(YouTubeCaptionUtil.__requestUrl(videoId, opts)),
json = await response.json();
return YouTubeCaptionUtil.__parseTranscript(json);
}
static videoId() {
const video_id = window.location.search.split('v=')[1];
if (video_id != null) {
const ampersandPosition = video_id.indexOf('&');
if (ampersandPosition != -1) {
return video_id.substring(0, ampersandPosition);
}
}
return null;
}
static __requestUrl(videoId, { baseUrl, languageId }) {
return `${baseUrl}?lang=${languageId}&v=${videoId}&fmt=json3`;
}
static __parseTranscript({events}) {
return events.map(({tStartMs, dDurationMs, segs: [{utf8}]}) => ({
start: YouTubeCaptionUtil.__formatTime(tStartMs),
dur: YouTubeCaptionUtil.__formatTime(dDurationMs),
text: utf8
}));
}
static __formatTime(seconds) {
const date = new Date(null);
date.setSeconds(seconds);
return date.toISOString().substr(11, 8);
};
}
YouTubeCaptionUtil.defaultOptions = {
baseUrl: 'https://video.google.com/timedtext',
languageId: 'en'
};
class CsvUtil {
static fromJson(json, options) {
const
opts = { ...CsvUtil.defaultOptions, ...options },
keys = Object.keys(json[0]).filter(key =>
opts.ignoreKeys.indexOf(key) === -1),
lines = [];
if (opts.includeHeader) lines.push(keys.join(opts.delimiter));
return lines.concat(json
.map(entry => keys.map(key => entry[key]).join(opts.delimiter)))
.join('\n');
}
}
CsvUtil.defaultOptions = {
includeHeader: false,
ignoreKeys: ['dur'],
delimiter: '\t'
};
main();
.as-console-wrapper { top: 0; max-height: 100% !important; }
const main = async() => {
const defaultId = 'fJ9rUzIMcZQ'; // Queen – Bohemian Rhapsody (default ID)
const json = await loadYouTubeSubtitles(getYouTubeVideoId() || defaultId);
const csv = jsonToCsv(json, {
includeHeader: false,
ignoreKeys: ['dur'],
delimiter: '\t',
});
console.log(csv);
};
const parseTranscript = ({ events }) => {
return events.map(({ tStartMs, dDurationMs, segs: [{ utf8 }] }) => ({
start: formatTime(tStartMs),
dur: formatTime(dDurationMs),
text: utf8
}));
};
const formatTime = (seconds) => {
let date = new Date(null);
date.setSeconds(seconds);
return date.toISOString().substr(11, 8);
};
const getYouTubeVideoId = () => {
var video_id = window.location.search.split('v=')[1];
if (video_id != null) {
var ampersandPosition = video_id.indexOf('&');
if (ampersandPosition != -1) {
return video_id.substring(0, ampersandPosition);
}
}
return null;
};
const loadYouTubeSubtitles = async(videoId, options) => {
options = Object.assign({
baseUrl: 'https://video.google.com/timedtext',
languageId: 'en',
}, options || {});
const requestUrl = `${options.baseUrl}?lang=${options.languageId}&v=${videoId}&fmt=json3`;
const response = await fetch(requestUrl);
const json = await response.json();
return parseTranscript(json);
};
const jsonToCsv = (json, options) => {
options = Object.assign({
includeHeader: true,
delimiter: ',',
ignoreKeys: []
}, options || {});
let keys = Object.keys(json[0]).filter(key => options.ignoreKeys.indexOf(key) === -1);
let lines = [];
if (options.includeHeader) {
lines.push(keys.join(options.delimiter));
}
return lines.concat(json
.map(entry => keys.map(key => entry[key]).join(options.delimiter)))
.join('\n');
};
main();
.as-console-wrapper { top: 0; max-height: 100% !important; }
This response creates an XMLHttpRequest
.
loadYouTubeSubtitles((getYouTubeVideoId() || 'fJ9rUzIMcZQ'), {
callbackFn : function(json) {
console.log(jsonToCsv(json, {
includeHeader : false,
ignoreKeys : [ 'dur' ],
delimiter : '\t',
}));
}
}); // Queen – Bohemian Rhapsody (default ID)
function getYouTubeVideoId() {
var video_id = window.location.search.split('v=')[1];
if (video_id != null) {
var ampersandPosition = video_id.indexOf('&');
if (ampersandPosition != -1) {
return video_id.substring(0, ampersandPosition);
}
}
return null;
}
function loadYouTubeSubtitles(videoId, options) {
options = Object.assign({
baseUrl : 'https://video.google.com/timedtext',
languageId : 'en',
callbackFn : function(json) { console.log(json); } // Default
}, options || {});
// https://stackoverflow.com/a/9609450/1762224
var decodeHTML = (function() {
let el = document.createElement('div');
function __decode(str) {
if (str && typeof str === 'string') {
str = str.replace(/<script[^>]*>([\S\s]*?)<\/script>/gmi, '')
.replace(/<\/?\w(?:[^"'>]|"[^"]*"|'[^']*')*>/gmi, '');
el.innerHTML = str;
str = el.textContent;
el.textContent = '';
}
return str;
}
removeElement(el); // Clean-up
return __decode;
})();
function removeElement(el) {
el && el.parentNode && el.parentNode.removeChild(el);
}
function parseTranscriptAsJSON(xml) {
return [].slice.call(xml.querySelectorAll('transcript text'))
.map(text => ({
start : formatTime(Math.floor(text.getAttribute('start'))),
dur : formatTime(Math.floor(text.getAttribute('dur'))),
text : decodeHTML(text.textContent).replace(/\s+/g, ' ')
}));
}
function formatTime(seconds) {
let date = new Date(null);
date.setSeconds(seconds);
return date.toISOString().substr(11, 8);
}
let xhr = new XMLHttpRequest();
xhr.open('POST', `${options.baseUrl}?lang=${options.languageId}&v=${videoId}`, true);
xhr.responseType = 'document';
xhr.onload = function() {
if (this.status >= 200 && this.status < 400) {
options.callbackFn(parseTranscriptAsJSON(this.response));
} else {
console.log('Error: ' + this.status);
}
};
xhr.onerror = function() {
console.log('Error!');
};
xhr.send();
}
function jsonToCsv(json, options) {
options = Object.assign({
includeHeader : true,
delimiter : ',',
ignoreKeys : []
}, options || {});
let keys = Object.keys(json[0]).filter(key => options.ignoreKeys.indexOf(key) === -1);
let lines = [];
if (options.includeHeader) { lines.push(keys.join(options.delimiter)); }
return lines.concat(json
.map(entry => keys.map(key => entry[key]).join(options.delimiter)))
.join('\n');
}
.as-console-wrapper { top: 0; max-height: 100% !important; }
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With