Example 1: a video player with clickable transcript on the side

Example 1: a video player with clickable transcript – reading WebVTT file content at once

A few words about the set of five examples presented in this chapter: the code of the examples is larger than usual, but each example integrates blocks of code already presented and detailed in the previous lessons.

Creating an accessible player with a clickable transcript of the video presentation

It might be interesting to read the content of a track before playing the video. This is what the edX video player does: it reads a single subtitle file and displays it as a transcript on the right. In the transcript, you can click on a sentence to make the video jump to the corresponding location. We will learn how to do this using the track API.

edX video player with clickable transcript on the right

Example 1: read the webvtt file at once using the track api and make a clickable transcript

Here we decided to code something similar, except that we will offer a choice of track/subtitle language. Our example offers English or German subtitles, and also another track that contains the chapter descriptions (more on that later). Using a button to select a language (track), the appropriate  transcript is displayed on the right. Like the edX player, we can click on any sentence in order to force the video to jump to the corresponding location. While the video is playing, the current text is highlighted.

Some important things here:

  1. Browsers do not load all the tracks at the same time, and the way they decide when and which track to load differs from one browser to another. So, when we click on a button to choose the track to display, we need to enforce the loading of the track, if it has not been loaded yet.
  2. When a track file is loaded, then we iterate on the different cues and generate the transcript as a set of <li>…</li> elements. One <li>per cue/sentence. 
  3. We define the id attribute of the <li> to be the same as the cue.id value. In this way, when we click on a <li> we can get its id and find the corresponding cue start time, and make the video jump to that time location.
  4. We add an enter and an exit listener to each cue. These will be useful for highlighting the current cue. Note that these listeners are not yet supported by FireFox (you can use a cuechange event listener on a TextTrack instead – the source code for FireFox is commented in the example).

Try this example at JSBin:

video player with clickable transcript

HTML code:

  1. <section id=”all”>
  2. <button disabled id=”buttonEnglish”
  3.          onclick=”loadTranscript(‘en’);“>
  4.     Display English transcript
  5.  </button>
  6. <button disabled id=”buttonDeutsch”
  7.          onclick=”loadTranscript(‘de’);“>
  8.     Display Deutsch transcript
  9. </button>
  10. </p>
  11. <video id=”myVideo” preload=”metadata” controls crossOrigin=”anonymous”>
  12.      <source src=”https://&#8230;../elephants-dream-medium.mp4″
  13.              type=”video/mp4″>
  14.      <source src=”https://&#8230;../elephants-dream-medium.webm”
  15.              type=”video/webm”>
  16.      <track label=”English subtitles”
  17.             kind=”subtitles”
  18.             srclang=”en”
  19.             src=”https://&#8230;../elephants-dream-subtitles-en.vtt” >
  20.      <track label=”Deutsch subtitles”
  21.             kind=”subtitles”
  22.             srclang=”de”
  23.             src=”https://&#8230;../elephants-dream-subtitles-de.vtt”
  24.             default>
  25.      <track label=”English chapters”
  26.             kind=”chapters”
  27.             srclang=”en”
  28.             src=”https://&#8230;../elephants-dream-chapters-en.vtt”>
  29. </video>
  30. </section>

CSS code:

  1. #all {
  2.    background-color: lightgrey;
  3.    border-radius:10px;
  4.    padding: 20px;
  5.    border:1px solid;
  6.    display:inline-block;
  7.    margin:30px;
  8.    width:90%;
  9. }
  10.  
  11. .cues {
  12.    color:blue;
  13. }
  14.  
  15. .cues:hover {
  16.    text-decoration: underline;
  17. }
  18.  
  19. .cues.current {
  20.    color:black;
  21.    font-weight: bold;
  22. }
  23.  
  24. #myVideo {
  25.    display: block;
  26.    float : left;
  27.    margin-right: 3%;
  28.    width: 66%;
  29.    background-color: black;
  30.    position: relative;
  31. }
  32.  
  33. #transcript {
  34.    padding: 10px;
  35.    border:1px solid;
  36.    float: left;
  37.    max-height: 225px;
  38.    overflow: auto;
  39.    width: 25%;
  40.    margin: 0;
  41.    font-size: 14px;
  42.    list-style: none;
  43. }

JavaScript code:

  1. var video, transcriptDiv;
  2. // TextTracks, html tracks, urls of tracks
  3. var tracks, trackElems, tracksURLs = []; 
  4. var buttonEnglish, buttonDeutsch;
  5.  
  6. window.onload = function() {
  7.    console.log(“init”);
  8.    // when the page is loaded, get the different DOM nodes
  9.    // we’re going to work with
  10.    video = document.querySelector(“#myVideo”);
  11.    transcriptDiv = document.querySelector(“#transcript”);
  12.    // The tracks as HTML elements
  13.    trackElems = document.querySelectorAll(“track”);
  14.    // Get the URLs of the vtt files
  15.    for(var i = 0; i < trackElems.length; i++) {
  16.       var currentTrackElem = trackElems[i];
  17.       tracksURLs[i] = currentTrackElem.src;
  18.    }
  19.    buttonEnglish = document.querySelector(“#buttonEnglish”);
  20.    buttonDeutsch = document.querySelector(“#buttonDeutsch”);
  21.    // we enable the buttons only in this load callback,
  22.    // we cannot click before the video is in the DOM
  23.    buttonEnglish.disabled = false;
  24.    buttonDeutsch.disabled = false;
  25.    // The tracks as TextTrack JS objects
  26.    tracks = video.textTracks;
  27. };
  28.  
  29. function loadTranscript(lang) {
  30.   // Called when a button is clicked
  31.   // clear current transcript
  32.   clearTranscriptDiv();
  33.   // set all track modes to disabled. We will only activate the
  34.   // one whose content will be displayed as transcript
  35.   disableAllTracks();
  36.   // Locate the track with language = lang
  37.   for(var i = 0; i < tracks.length; i++) {
  38.     // current track
  39.     var track = tracks[i];
  40.     var trackAsHtmlElem = trackElems[i];
  41.     // Only subtitles/captions are ok for this example…
  42.     if((track.language === lang) && (track.kind !== “chapters”)) {
  43.        track.mode=”showing”;
  44.  
  45.        if(trackAsHtmlElem.readyState === 2) {
  46.           // the track has already been loaded
  47.           displayCues(track);
  48.        } else {
  49.           displayCuesAfterTrackLoaded(trackAsHtmlElem, track);
  50.        }
  51.        /* Fallback for FireFox that still does not implement cue enter and exit events
  52.          track.addEventListener(“cuechange”, function(e) {
  53.              var cue = this.activeCues[0];
  54.              console.log(“cue change”);
  55.              var transcriptText = document.getElementById(cue.id);
  56.              transcriptText.classList.add(“current”);
  57.          });
  58.       */
  59.     }
  60.   }
  61. }
  62. function displayCuesAfterTrackLoaded(trackElem, track) {
  63.   // Create a listener that will only be called once the track has
  64.   // been loaded. We cannot display the transcript before
  65.   // the track is loaded
  66.    trackElem.addEventListener(‘load’, function(e) {
  67.       console.log(“track loaded”);
  68.       displayCues(track);
  69.    });
  70. }
  71. function disableAllTracks() {
  72.   for(var i = 0; i < tracks.length; i++)
  73.      // the track mode is important: disabled tracks do not fire events
  74.      tracks[i].mode = “disabled”; 
  75. }
  76.  
  77. function displayCues(track) { 
  78.    // displays the transcript of a TextTrack
  79.    var cues = track.cues;
  80.    // iterate on all cues of the current track
  81.    for(var i=0, len = cues.length; i < len; i++) {
  82.       // current cue, also add enter and exit listeners to it
  83.       var cue = cues[i];
  84.       addCueListeners(cue);
  85.  
  86.       // Test if the cue content is a voice <v speaker>….</v>
  87.       var voices = getVoices(cue.text);
  88.       var transText=””;
  89.       if (voices.length > 0) {
  90.          for (var j = 0; j < voices.length; j++) { // how many voices?
  91.             transText += voices[j].voice + ‘: ‘ + removeHTML(voices[j].text);
  92.          }
  93.       } else
  94.          transText = cue.text; // not a voice text
  95.       var clickableTransText = “<li class=’cues’ id=” + cue.id
  96.                                + ” onclick=’jumpTo(“
  97.                                + cue.startTime + “);'” + “>”
  98.                                + transText + “</li>”;
  99.  
  100.       addToTranscriptDiv(clickableTransText);
  101.    }
  102. }
  103.  
  104. function getVoices(speech) { 
  105.    // takes a text content and check if there are voices
  106.    var voices = []; // inside
  107.    var pos = speech.indexOf(‘<v’); // voices are like <v Michel> ….
  108.    while (pos != -1) {
  109.       endVoice = speech.indexOf(‘>’);
  110.       var voice = speech.substring(pos + 2, endVoice).trim();
  111.       var endSpeech = speech.indexOf(‘</v>’);
  112.       var text = speech.substring(endVoice + 1, endSpeech);
  113.       voices.push({
  114.           ‘voice’: voice,
  115.           ‘text’: text
  116.       });
  117.       speech = speech.substring(endSpeech + 4);
  118.       pos = speech.indexOf(‘<v’);
  119.   }
  120.   return voices;
  121. }
  122.  
  123. function removeHTML(text) {
  124.   var div = document.createElement(‘div’);
  125.   div.innerHTML = text;
  126.   return div.textContent || div.innerText || ”;
  127. }
  128. function jumpTo(time) {
  129.   // Make the video jump at the time position + force play
  130.   // if it was not playing
  131.   video.currentTime = time;
  132.   video.play();
  133. }
  134.  
  135. function clearTranscriptDiv() {
  136.   transcriptDiv.innerHTML = “”;
  137. }
  138.  
  139. function addToTranscriptDiv(htmlText) {
  140.   transcriptDiv.innerHTML += htmlText;
  141. }
  142.  
  143. function addCueListeners(cue) {
  144.   cue.onenter = function(){
  145.      // Highlight current cue transcript by adding the
  146.      // cue.current CSS class
  147.      console.log(‘enter id=’ + this.id);
  148.      var transcriptText = document.getElementById(this.id);
  149.      transcriptText.classList.add(“current”);
  150. };
  151.  
  152. cue.onexit = function(){
  153.    console.log(‘exit id=’ + cue.id);
  154.    var transcriptText = document.getElementById(this.id);
  155.    transcriptText.classList.remove(“current”);
  156. };
  157. } // end of addCueListeners…

Example 2: LOAD a WebVTT file using Ajax/XHR2 and parse it manually

This is an old example written in 2012 at a time when the track API was not supported by browsers. It downloads WebVTT files using Ajax and parses it “by hand”. Notice the complexity of the code, compared to example 1 that uses the track API instead. We give this example as is. Sometimes, bypassing all APIs can be a valuable solution, especially when support for the track API is sporadic, as was the case in 2012…

Here is an example at JSBin that displays the values of the cues in the different tracks:

screenshot of JsBin example: video on top and two buttons "english" and "german" at bottom for extracting the track contents in english or grman

This example, adapted from an example from (now offline) dev.opera.com, uses some JavaScript code that takes a WebVTT subtitle (or caption) file as an argument, parses it, and displays the text on screen, in an element with an id of transcript.

Extract from HTML code:

  1. <video preload=”metadata” controls >
  2.     <source src=”https://&#8230;./elephants-dream-medium.mp4″ type=”video/mp4″>
  3.     <source src=”https://&#8230;./elephants-dream-medium.webm” type=”video/webm”>
  4.     <track label=”English subtitles” kind=”subtitles” srclang=”en”
  5.            src=”https://&#8230;./elephants-dream-subtitles-en.vtt” default>
  6.     <track label=”Deutsch subtitles” kind=”subtitles” srclang=”de”
  7.            src=”https://&#8230;./elephants-dream-subtitles-de.vtt”>
  8.     <track label=”English chapters” kind=”chapters” srclang=”en”
  9.            src=”https://&#8230;./elephants-dream-chapters-en.vtt”>
  10. </video>
  11.  …
  12.    <h3>Video Transcript</h3>
  13.    <button onclick=”loadTranscript(‘en’);”>English</button>
  14.    <button onclick=”loadTranscript(‘de’);”>Deutsch</button>
  15.     </div>
  16.     

JavaScript code:

  1. // Transcript.js, by dev.opera.com
  2. function loadTranscript(lang) {
  3.    var url = “http://mainline.i3s.unice.fr/mooc/” +
  4.        ‘elephants-dream-subtitles-‘ + lang + ‘.vtt’;
  5.    // Will download using Ajax + extract subtitles/captions   
  6.    loadTranscriptFile(url); 
  7. }
  8.  
  9. function loadTranscriptFile(webvttFileUrl) {
  10.    // Using Ajax/XHR2 (explained in detail in Week 3)
  11.    var reqTrans = new XMLHttpRequest();
  12.    reqTrans.open(‘GET’, webvttFileUrl);
  13.    // callback, called only once the response is ready
  14.    reqTrans.onload = function(e) { 
  15.        var pattern = /^([0-9]+)$/;
  16.        var patternTimecode = /^([0-9]{2}:[0-9]{2}:[0-9]{2}[,.]{1}[0-9]{3}) –\> ([0-9]
  17.                              {2}:[0-9]{2}:[0-9]{2}[,.]{1}[0-9]{3})(.*)$/;
  18.        var content = this.response; // content of the webVTT file
  19.        var lines = content.split(/\r?\n/); // Get an array of text lines
  20.        var transcript = ”;
  21.        for (i = 0; i < lines.length; i++) {
  22.          var identifier = pattern.exec(lines[i]);
  23.          // is there an id for this line, if it is, go to next line
  24.          if (identifier) { 
  25.            i++;
  26.            var timecode = patternTimecode.exec(lines[i]); 
  27.            // is the current line a timecode?
  28.            if (timecode && i < lines.length) {      
  29.               // if it is go to next line     
  30.               i++;
  31.               // it can only be a text line now
  32.               var text = lines[i];  
  33.                         
  34.               // is the text multiline?
  35.               while (lines[i] !== ” && i < lines.length) {   
  36.                  text = text + ‘\n’ + lines[i];
  37.                  i++;
  38.               }
  39.               var transText = ”;
  40.               var voices = getVoices(text);
  41.               // is the extracted text multi voices ? 
  42.               if (voices.length > 0) {
  43.                  // how many voices ?
  44.                  for (var j = 0; j < voices.length; j++) { 
  45.                  transText += voices[j].voice + ‘: ‘
  46.                            + removeHTML(voices[j].text)
  47.                            + ‘<br />’;
  48.               }
  49.           } else 
  50.              // not a voice text
  51.              transText = removeHTML(text) + ‘<br />’; 
  52.          transcript += transText;
  53.        }
  54.      }
  55.      var oTrans = document.getElementById(‘transcript’);
  56.      oTrans.innerHTML = transcript;
  57.    }
  58. };
  59.  reqTrans.send(); // send the Ajax request
  60. }
  61.  
  62. function getVoices(speech) {  // takes a text content and check if there are voices 
  63.   var voices = [];            // inside
  64.   var pos = speech.indexOf(‘<v’); // voices are like <v Michel> ….
  65.   while (pos != -1) {
  66.     endVoice = speech.indexOf(‘>’);
  67.     var voice = speech.substring(pos + 2, endVoice).trim();
  68.     var endSpeech = speech.indexOf(‘</v>’);
  69.     var text = speech.substring(endVoice + 1, endSpeech);
  70.     voices.push({
  71.        ‘voice’: voice,
  72.        ‘text’: text
  73.     });
  74.     speech = speech.substring(endSpeech + 4);
  75.     pos = speech.indexOf(‘<v’);
  76.   }
  77.   return voices;
  78. }
  79.  
  80. function removeHTML(text) {
  81.   var div = document.createElement(‘div’);
  82.   div.innerHTML = text;
  83.   return div.textContent || div.innerText || ”;
  84. }

Leave a comment