/* eslint-disable */ /** * fMP4 remuxer */ import AAC from '../helper/aac'; import Event from '../events'; import { logger } from '../utils/logger'; import MP4 from '../remux/mp4-generator'; import { ErrorTypes, ErrorDetails } from '../errors'; import '../utils/polyfill'; class MP4Remuxer { constructor(observer, id, config) { this.observer = observer; this.id = id; this.config = config; this.ISGenerated = false; this.PES2MP4SCALEFACTOR = 4; this.PES_TIMESCALE = 90000; this.MP4_TIMESCALE = this.PES_TIMESCALE / this.PES2MP4SCALEFACTOR; this.nextAvcDts = 90300; this.H264_TIMEBASE = 3600; } get passthrough() { return false; } destroy() { } insertDiscontinuity() { this._initPTS = this._initDTS = undefined; } switchLevel() { this.ISGenerated = false; } pushVideo(level, sn, videoTrack, timeOffset, contiguous) { this.level = level; this.sn = sn; let videoData; // generate Init Segment if needed if (!this.ISGenerated) { this.generateVideoIS(videoTrack, timeOffset); } if (this.ISGenerated) { // if (videoTrack.samples.length) { this.remuxVideo_2(videoTrack, timeOffset, contiguous); // } } } remuxVideo_2(track, timeOffset, contiguous, audioTrackLength) { var offset = 8, pesTimeScale = this.PES_TIMESCALE, pes2mp4ScaleFactor = this.PES2MP4SCALEFACTOR, mp4SampleDuration, mdat, moof, firstPTS, firstDTS, nextDTS, inputSamples = track.samples, outputSamples = []; /* concatenate the video data and construct the mdat in place (need 8 more bytes to fill length and mpdat type) */ mdat = new Uint8Array(track.len + (4 * track.nbNalu) + 8); let view = new DataView(mdat.buffer); view.setUint32(0, mdat.byteLength); mdat.set(MP4.types.mdat, 4); var sampleDuration = 0; let ptsnorm, dtsnorm, mp4Sample, lastDTS; for (let i = 0; i < inputSamples.length; i++) { let avcSample = inputSamples[i], mp4SampleLength = 0, compositionTimeOffset; // convert NALU bitstream to MP4 format (prepend NALU with size field) while (avcSample.units.units.length) { let unit = avcSample.units.units.shift(); view.setUint32(offset, unit.data.byteLength); offset += 4; mdat.set(unit.data, offset); offset += unit.data.byteLength; mp4SampleLength += 4 + unit.data.byteLength; } let pts = avcSample.pts - this._initPTS; let dts = avcSample.dts - this._initDTS; dts = Math.min(pts, dts); if (lastDTS !== undefined) { ptsnorm = this._PTSNormalize(pts, lastDTS); dtsnorm = this._PTSNormalize(dts, lastDTS); sampleDuration = (dtsnorm - lastDTS) if (sampleDuration <= 0) { logger.log(`invalid sample duration at PTS/DTS: ${avcSample.pts}/${avcSample.dts}|dts norm: ${dtsnorm}|lastDTS: ${lastDTS}:${sampleDuration}`); sampleDuration = 1; } } else { var nextAvcDts = this.nextAvcDts, delta; ptsnorm = this._PTSNormalize(pts, nextAvcDts); dtsnorm = this._PTSNormalize(dts, nextAvcDts); if (nextAvcDts) { delta = Math.round((dtsnorm - nextAvcDts)); if (/*contiguous ||*/ Math.abs(delta) < 600) { if (delta) { if (delta > 1) { logger.log(`AVC:${delta} ms hole between fragments detected,filling it`); } else if (delta < -1) { logger.log(`AVC:${(-delta)} ms overlapping between fragments detected`); } dtsnorm = nextAvcDts; ptsnorm = Math.max(ptsnorm - delta, dtsnorm); logger.log(`Video/PTS/DTS adjusted: ${ptsnorm}/${dtsnorm},delta:${delta}`); } } } this.firstPTS = Math.max(0, ptsnorm); this.firstDTS = Math.max(0, dtsnorm); sampleDuration = 0.03; } outputSamples.push({ size: mp4SampleLength, duration: this.H264_TIMEBASE, cts: 0, flags: { isLeading: 0, isDependedOn: 0, hasRedundancy: 0, degradPrio: 0, dependsOn: avcSample.key ? 2 : 1, isNonSync: avcSample.key ? 0 : 1 } }); lastDTS = dtsnorm; } var lastSampleDuration = 0; if (outputSamples.length >= 2) { lastSampleDuration = outputSamples[outputSamples.length - 2].duration; outputSamples[0].duration = lastSampleDuration; } this.nextAvcDts = dtsnorm + lastSampleDuration; let dropped = track.dropped; track.len = 0; track.nbNalu = 0; track.dropped = 0; if (outputSamples.length && navigator.userAgent.toLowerCase().indexOf('chrome') > -1) { let flags = outputSamples[0].flags; flags.dependsOn = 2; flags.isNonSync = 0; } track.samples = outputSamples; moof = MP4.moof(track.sequenceNumber++, dtsnorm, track); track.samples = []; let data = { id: this.id, level: this.level, sn: this.sn, data1: moof, data2: mdat, startPTS: ptsnorm, endPTS: ptsnorm, startDTS: dtsnorm, endDTS: dtsnorm, type: 'video', nb: outputSamples.length, dropped: dropped }; this.observer.trigger(Event.FRAG_PARSING_DATA, data); return data; } generateVideoIS(videoTrack, timeOffset) { var observer = this.observer, videoSamples = videoTrack.samples, pesTimeScale = this.PES_TIMESCALE, tracks = {}, data = { id: this.id, level: this.level, sn: this.sn, tracks: tracks, unique: false }, computePTSDTS = (this._initPTS === undefined), initPTS, initDTS; if (computePTSDTS) { initPTS = initDTS = Infinity; } if (videoTrack.sps && videoTrack.pps && videoSamples.length) { videoTrack.timescale = 90000;//this.MP4_TIMESCALE; tracks.video = { container: 'video/mp4', codec: videoTrack.codec, initSegment: MP4.initSegment([videoTrack]), metadata: { width: videoTrack.width, height: videoTrack.height } }; if (computePTSDTS) { initPTS = Math.min(initPTS, videoSamples[0].pts - this.H264_TIMEBASE); initDTS = Math.min(initDTS, videoSamples[0].dts - this.H264_TIMEBASE); } } if (Object.keys(tracks).length) { observer.trigger(Event.FRAG_PARSING_INIT_SEGMENT, data); this.ISGenerated = true; if (computePTSDTS) { this._initPTS = initPTS; this._initDTS = initDTS; } } else { console.log("generateVideoIS ERROR==> ", ErrorTypes.MEDIA_ERROR); } } remux(level, sn, audioTrack, videoTrack, id3Track, textTrack, timeOffset, contiguous) { this.level = level; this.sn = sn; // generate Init Segment if needed if (!this.ISGenerated) { this.generateIS(audioTrack, videoTrack, timeOffset); } if (this.ISGenerated) { // Purposefully remuxing audio before video, so that remuxVideo can use nextAacPts, which is // calculated in remuxAudio. //logger.log('nb AAC samples:' + audioTrack.samples.length); if (audioTrack.samples.length) { let audioData = this.remuxAudio(audioTrack, timeOffset, contiguous); //logger.log('nb AVC samples:' + videoTrack.samples.length); if (videoTrack.samples.length) { let audioTrackLength; if (audioData) { audioTrackLength = audioData.endPTS - audioData.startPTS; } this.remuxVideo(videoTrack, timeOffset, contiguous, audioTrackLength); } } else { let videoData; //logger.log('nb AVC samples:' + videoTrack.samples.length); if (videoTrack.samples.length) { videoData = this.remuxVideo(videoTrack, timeOffset, contiguous); } if (videoData && audioTrack.codec) { this.remuxEmptyAudio(audioTrack, timeOffset, contiguous, videoData); } } } //logger.log('nb ID3 samples:' + audioTrack.samples.length); if (id3Track.samples.length) { this.remuxID3(id3Track, timeOffset); } //logger.log('nb ID3 samples:' + audioTrack.samples.length); if (textTrack.samples.length) { this.remuxText(textTrack, timeOffset); } //notify end of parsing this.observer.trigger(Event.FRAG_PARSED, { id: this.id, level: this.level, sn: this.sn }); } generateIS(audioTrack, videoTrack, timeOffset) { var observer = this.observer, audioSamples = audioTrack.samples, videoSamples = videoTrack.samples, pesTimeScale = this.PES_TIMESCALE, tracks = {}, data = { id: this.id, level: this.level, sn: this.sn, tracks: tracks, unique: false }, computePTSDTS = (this._initPTS === undefined), initPTS, initDTS; if (computePTSDTS) { initPTS = initDTS = Infinity; } if (audioTrack.config && audioSamples.length) { audioTrack.timescale = audioTrack.audiosamplerate; // MP4 duration (track duration in seconds multiplied by timescale) is coded on 32 bits // we know that each AAC sample contains 1024 frames.... // in order to avoid overflowing the 32 bit counter for large duration, we use smaller timescale (timescale/gcd) // we just need to ensure that AAC sample duration will still be an integer (will be 1024/gcd) if (audioTrack.timescale * audioTrack.duration > Math.pow(2, 32)) { let greatestCommonDivisor = function (a, b) { if (!b) { return a; } return greatestCommonDivisor(b, a % b); }; audioTrack.timescale = audioTrack.audiosamplerate / greatestCommonDivisor(audioTrack.audiosamplerate, 1024); } logger.log('audio mp4 timescale :' + audioTrack.timescale); tracks.audio = { container: 'audio/mp4', codec: audioTrack.codec, initSegment: MP4.initSegment([audioTrack]), metadata: { channelCount: audioTrack.channelCount } }; if (computePTSDTS) { // remember first PTS of this demuxing context. for audio, PTS + DTS ... initPTS = initDTS = audioSamples[0].pts - pesTimeScale * timeOffset; } } if (videoTrack.sps && videoTrack.pps && videoSamples.length) { videoTrack.timescale = this.MP4_TIMESCALE; tracks.video = { container: 'video/mp4', codec: videoTrack.codec, initSegment: MP4.initSegment([videoTrack]), metadata: { width: videoTrack.width, height: videoTrack.height } }; if (computePTSDTS) { initPTS = Math.min(initPTS, videoSamples[0].pts - pesTimeScale * timeOffset); initDTS = Math.min(initDTS, videoSamples[0].dts - pesTimeScale * timeOffset); } } if (Object.keys(tracks).length) { observer.trigger(Event.FRAG_PARSING_INIT_SEGMENT, data); this.ISGenerated = true; if (computePTSDTS) { this._initPTS = initPTS; this._initDTS = initDTS; } } else { observer.trigger(Event.ERROR, { type: ErrorTypes.MEDIA_ERROR, id: this.id, details: ErrorDetails.FRAG_PARSING_ERROR, fatal: false, reason: 'no audio/video samples found' }); } } remuxVideo(track, timeOffset, contiguous, audioTrackLength) { var offset = 8, pesTimeScale = this.PES_TIMESCALE, pes2mp4ScaleFactor = this.PES2MP4SCALEFACTOR, mp4SampleDuration, mdat, moof, firstPTS, firstDTS, nextDTS, lastPTS, lastDTS, inputSamples = track.samples, outputSamples = []; // PTS is coded on 33bits, and can loop from -2^32 to 2^32 // PTSNormalize will make PTS/DTS value monotonic, we use last known DTS value as reference value let nextAvcDts; if (contiguous) { // if parsed fragment is contiguous with last one, let's use last DTS value as reference nextAvcDts = this.nextAvcDts; } else { // if not contiguous, let's use target timeOffset nextAvcDts = timeOffset * pesTimeScale; } // compute first DTS and last DTS, normalize them against reference value let sample = inputSamples[0]; firstDTS = Math.max(this._PTSNormalize(sample.dts, nextAvcDts) - this._initDTS, 0); firstPTS = Math.max(this._PTSNormalize(sample.pts, nextAvcDts) - this._initDTS, 0); // check timestamp continuity accross consecutive fragments (this is to remove inter-fragment gap/hole) let delta = Math.round((firstDTS - nextAvcDts) / 90); // if fragment are contiguous, detect hole/overlapping between fragments if (contiguous) { if (delta) { if (delta > 1) { logger.log(`AVC:${delta} ms hole between fragments detected,filling it`); } else if (delta < -1) { logger.log(`AVC:${(-delta)} ms overlapping between fragments detected`); } // remove hole/gap : set DTS to next expected DTS firstDTS = nextAvcDts; inputSamples[0].dts = firstDTS + this._initDTS; // offset PTS as well, ensure that PTS is smaller or equal than new DTS firstPTS = Math.max(firstPTS - delta, nextAvcDts); inputSamples[0].pts = firstPTS + this._initDTS; logger.log(`Video/PTS/DTS adjusted: ${firstPTS}/${firstDTS},delta:${delta}`); } } nextDTS = firstDTS; // compute lastPTS/lastDTS sample = inputSamples[inputSamples.length - 1]; lastDTS = Math.max(this._PTSNormalize(sample.dts, nextAvcDts) - this._initDTS, 0); lastPTS = Math.max(this._PTSNormalize(sample.pts, nextAvcDts) - this._initDTS, 0); lastPTS = Math.max(lastPTS, lastDTS); let vendor = navigator.vendor, userAgent = navigator.userAgent, isSafari = vendor && vendor.indexOf('Apple') > -1 && userAgent && !userAgent.match('CriOS'); // on Safari let's signal the same sample duration for all samples // sample duration (as expected by trun MP4 boxes), should be the delta between sample DTS // set this constant duration as being the avg delta between consecutive DTS. if (isSafari) { mp4SampleDuration = Math.round((lastDTS - firstDTS) / (pes2mp4ScaleFactor * (inputSamples.length - 1))); } // normalize all PTS/DTS now ... for (let i = 0; i < inputSamples.length; i++) { let sample = inputSamples[i]; if (isSafari) { // sample DTS is computed using a constant decoding offset (mp4SampleDuration) between samples sample.dts = firstDTS + i * pes2mp4ScaleFactor * mp4SampleDuration; } else { // ensure sample monotonic DTS sample.dts = Math.max(this._PTSNormalize(sample.dts, nextAvcDts) - this._initDTS, firstDTS); // ensure dts is a multiple of scale factor to avoid rounding issues sample.dts = Math.round(sample.dts / pes2mp4ScaleFactor) * pes2mp4ScaleFactor; } // we normalize PTS against nextAvcDts, we also substract initDTS (some streams don't start @ PTS O) // and we ensure that computed value is greater or equal than sample DTS sample.pts = Math.max(this._PTSNormalize(sample.pts, nextAvcDts) - this._initDTS, sample.dts); // ensure pts is a multiple of scale factor to avoid rounding issues sample.pts = Math.round(sample.pts / pes2mp4ScaleFactor) * pes2mp4ScaleFactor; } /* concatenate the video data and construct the mdat in place (need 8 more bytes to fill length and mpdat type) */ mdat = new Uint8Array(track.len + (4 * track.nbNalu) + 8); let view = new DataView(mdat.buffer); view.setUint32(0, mdat.byteLength); mdat.set(MP4.types.mdat, 4); for (let i = 0; i < inputSamples.length; i++) { let avcSample = inputSamples[i], mp4SampleLength = 0, compositionTimeOffset; // convert NALU bitstream to MP4 format (prepend NALU with size field) while (avcSample.units.units.length) { let unit = avcSample.units.units.shift(); view.setUint32(offset, unit.data.byteLength); offset += 4; mdat.set(unit.data, offset); offset += unit.data.byteLength; mp4SampleLength += 4 + unit.data.byteLength; } if (!isSafari) { // expected sample duration is the Decoding Timestamp diff of consecutive samples if (i < inputSamples.length - 1) { mp4SampleDuration = inputSamples[i + 1].dts - avcSample.dts; } else { let config = this.config, lastFrameDuration = avcSample.dts - inputSamples[i > 0 ? i - 1 : i].dts; if (config.stretchShortVideoTrack) { // In some cases, a segment's audio track duration may exceed the video track duration. // Since we've already remuxed audio, and we know how long the audio track is, we look to // see if the delta to the next segment is longer than the minimum of maxBufferHole and // maxSeekHole. If so, playback would potentially get stuck, so we artificially inflate // the duration of the last frame to minimize any potential gap between segments. let maxBufferHole = config.maxBufferHole, maxSeekHole = config.maxSeekHole, gapTolerance = Math.floor(Math.min(maxBufferHole, maxSeekHole) * pesTimeScale), deltaToFrameEnd = (audioTrackLength ? firstPTS + audioTrackLength * pesTimeScale : this.nextAacPts) - avcSample.pts; if (deltaToFrameEnd > gapTolerance) { // We subtract lastFrameDuration from deltaToFrameEnd to try to prevent any video // frame overlap. maxBufferHole/maxSeekHole should be >> lastFrameDuration anyway. mp4SampleDuration = deltaToFrameEnd - lastFrameDuration; if (mp4SampleDuration < 0) { mp4SampleDuration = lastFrameDuration; } logger.log(`It is approximately ${deltaToFrameEnd / 90} ms to the next segment; using duration ${mp4SampleDuration / 90} ms for the last video frame.`); } else { mp4SampleDuration = lastFrameDuration; } } else { mp4SampleDuration = lastFrameDuration; } } mp4SampleDuration /= pes2mp4ScaleFactor; compositionTimeOffset = Math.round((avcSample.pts - avcSample.dts) / pes2mp4ScaleFactor); } else { compositionTimeOffset = Math.max(0, mp4SampleDuration * Math.round((avcSample.pts - avcSample.dts) / (pes2mp4ScaleFactor * mp4SampleDuration))); } outputSamples.push({ size: mp4SampleLength, // constant duration duration: mp4SampleDuration, cts: compositionTimeOffset, flags: { isLeading: 0, isDependedOn: 0, hasRedundancy: 0, degradPrio: 0, dependsOn: avcSample.key ? 2 : 1, isNonSync: avcSample.key ? 0 : 1 } }); } // next AVC sample DTS should be equal to last sample DTS + last sample duration (in PES timescale) this.nextAvcDts = lastDTS + mp4SampleDuration * pes2mp4ScaleFactor; let dropped = track.dropped; track.len = 0; track.nbNalu = 0; track.dropped = 0; if (outputSamples.length && navigator.userAgent.toLowerCase().indexOf('chrome') > -1) { let flags = outputSamples[0].flags; // chrome workaround, mark first sample as being a Random Access Point to avoid sourcebuffer append issue // https://code.google.com/p/chromium/issues/detail?id=229412 flags.dependsOn = 2; flags.isNonSync = 0; } track.samples = outputSamples; moof = MP4.moof(track.sequenceNumber++, firstDTS / pes2mp4ScaleFactor, track); track.samples = []; let data = { id: this.id, level: this.level, sn: this.sn, data1: moof, data2: mdat, startPTS: firstPTS / pesTimeScale, endPTS: (lastPTS + pes2mp4ScaleFactor * mp4SampleDuration) / pesTimeScale, startDTS: firstPTS / pesTimeScale, endDTS: (lastPTS + pes2mp4ScaleFactor * mp4SampleDuration) / pesTimeScale, // startDTS: firstDTS / pesTimeScale, // endDTS: this.nextAvcDts / pesTimeScale, type: 'video', nb: outputSamples.length, dropped: dropped }; this.observer.trigger(Event.FRAG_PARSING_DATA, data); return data; } remuxAudio(track, timeOffset, contiguous) { let pesTimeScale = this.PES_TIMESCALE, mp4timeScale = track.timescale, pes2mp4ScaleFactor = pesTimeScale / mp4timeScale, expectedSampleDuration = track.timescale * 1024 / track.audiosamplerate; var view, offset = 8, aacSample, mp4Sample, unit, mdat, moof, firstPTS, firstDTS, lastDTS, pts, dts, ptsnorm, dtsnorm, samples = [], samples0 = []; track.samples.sort(function (a, b) { return (a.pts - b.pts); }); samples0 = track.samples; let nextAacPts = (contiguous ? this.nextAacPts : timeOffset * pesTimeScale); // If the audio track is missing samples, the frames seem to get "left-shifted" within the // resulting mp4 segment, causing sync issues and leaving gaps at the end of the audio segment. // In an effort to prevent this from happening, we inject frames here where there are gaps. // When possible, we inject a silent frame; when that's not possible, we duplicate the last // frame. let firstPtsNorm = this._PTSNormalize(samples0[0].pts - this._initPTS, nextAacPts), pesFrameDuration = expectedSampleDuration * pes2mp4ScaleFactor; var nextPtsNorm = firstPtsNorm + pesFrameDuration; for (var i = 1; i < samples0.length;) { // First, let's see how far off this frame is from where we expect it to be var sample = samples0[i], ptsNorm = this._PTSNormalize(sample.pts - this._initPTS, nextAacPts), delta = ptsNorm - nextPtsNorm; // If we're overlapping by more than half a duration, drop this sample if (delta < (-0.5 * pesFrameDuration)) { logger.log(`Dropping frame due to ${Math.abs(delta / 90)} ms overlap.`); samples0.splice(i, 1); track.len -= sample.unit.length; // Don't touch nextPtsNorm or i } // Otherwise, if we're more than half a frame away from where we should be, insert missing frames else if (delta > (0.5 * pesFrameDuration)) { var missing = Math.round(delta / pesFrameDuration); logger.log(`Injecting ${missing} frame${missing > 1 ? 's' : ''} of missing audio due to ${Math.round(delta / 90)} ms gap.`); for (var j = 0; j < missing; j++) { var newStamp = samples0[i - 1].pts + pesFrameDuration, fillFrame = AAC.getSilentFrame(track.channelCount); if (!fillFrame) { logger.log('Unable to get silent frame for given audio codec; duplicating last frame instead.'); fillFrame = sample.unit.slice(0); } samples0.splice(i, 0, { unit: fillFrame, pts: newStamp, dts: newStamp }); track.len += fillFrame.length; i += 1; } // Adjust sample to next expected pts nextPtsNorm += (missing + 1) * pesFrameDuration; sample.pts = samples0[i - 1].pts + pesFrameDuration; i += 1; } // Otherwise, we're within half a frame duration, so just adjust pts else { if (Math.abs(delta) > (0.1 * pesFrameDuration)) { logger.log(`Invalid frame delta ${ptsNorm - nextPtsNorm + pesFrameDuration} at PTS ${Math.round(ptsNorm / 90)} (should be ${pesFrameDuration}).`); } nextPtsNorm += pesFrameDuration; sample.pts = samples0[i - 1].pts + pesFrameDuration; i += 1; } } while (samples0.length) { aacSample = samples0.shift(); unit = aacSample.unit; pts = aacSample.pts - this._initDTS; dts = aacSample.dts - this._initDTS; //logger.log(`Audio/PTS:${Math.round(pts/90)}`); // if not first sample if (lastDTS !== undefined) { ptsnorm = this._PTSNormalize(pts, lastDTS); dtsnorm = this._PTSNormalize(dts, lastDTS); mp4Sample.duration = (dtsnorm - lastDTS) / pes2mp4ScaleFactor; } else { ptsnorm = this._PTSNormalize(pts, nextAacPts); dtsnorm = this._PTSNormalize(dts, nextAacPts); let delta = Math.round(1000 * (ptsnorm - nextAacPts) / pesTimeScale); // if fragment are contiguous, detect hole/overlapping between fragments if (contiguous) { // log delta if (delta) { if (delta > 0) { logger.log(`${delta} ms hole between AAC samples detected,filling it`); // if we have frame overlap, overlapping for more than half a frame duraion } else if (delta < -12) { // drop overlapping audio frames... browser will deal with it logger.log(`${(-delta)} ms overlapping between AAC samples detected, drop frame`); track.len -= unit.byteLength; continue; } // set PTS/DTS to expected PTS/DTS ptsnorm = dtsnorm = nextAacPts; } } // remember first PTS of our aacSamples, ensure value is positive firstPTS = Math.max(0, ptsnorm); firstDTS = Math.max(0, dtsnorm); if (track.len > 0) { /* concatenate the audio data and construct the mdat in place (need 8 more bytes to fill length and mdat type) */ mdat = new Uint8Array(track.len + 8); view = new DataView(mdat.buffer); view.setUint32(0, mdat.byteLength); mdat.set(MP4.types.mdat, 4); } else { // no audio samples return; } } mdat.set(unit, offset); offset += unit.byteLength; //console.log('PTS/DTS/initDTS/normPTS/normDTS/relative PTS : ${aacSample.pts}/${aacSample.dts}/${this._initDTS}/${ptsnorm}/${dtsnorm}/${(aacSample.pts/4294967296).toFixed(3)}'); mp4Sample = { size: unit.byteLength, cts: 0, duration: 0, flags: { isLeading: 0, isDependedOn: 0, hasRedundancy: 0, degradPrio: 0, dependsOn: 1, } }; samples.push(mp4Sample); lastDTS = dtsnorm; } var lastSampleDuration = 0; var nbSamples = samples.length; //set last sample duration as being identical to previous sample if (nbSamples >= 2) { lastSampleDuration = samples[nbSamples - 2].duration; mp4Sample.duration = lastSampleDuration; } if (nbSamples) { // next aac sample PTS should be equal to last sample PTS + duration this.nextAacPts = ptsnorm + pes2mp4ScaleFactor * lastSampleDuration; //logger.log('Audio/PTS/PTSend:' + aacSample.pts.toFixed(0) + '/' + this.nextAacDts.toFixed(0)); track.len = 0; track.samples = samples; moof = MP4.moof(track.sequenceNumber++, firstDTS / pes2mp4ScaleFactor, track); track.samples = []; let audioData = { id: this.id, level: this.level, sn: this.sn, data1: moof, data2: mdat, startPTS: firstPTS / pesTimeScale, endPTS: this.nextAacPts / pesTimeScale, startDTS: firstDTS / pesTimeScale, endDTS: (dtsnorm + pes2mp4ScaleFactor * lastSampleDuration) / pesTimeScale, type: 'audio', nb: nbSamples }; this.observer.trigger(Event.FRAG_PARSING_DATA, audioData); return audioData; } return null; } remuxEmptyAudio(track, timeOffset, contiguous, videoData) { let pesTimeScale = this.PES_TIMESCALE, mp4timeScale = track.timescale ? track.timescale : track.audiosamplerate, pes2mp4ScaleFactor = pesTimeScale / mp4timeScale, // sync with video's timestamp startDTS = videoData.startDTS * pesTimeScale + this._initDTS, endDTS = videoData.endDTS * pesTimeScale + this._initDTS, // one sample's duration value sampleDuration = 1024, frameDuration = pes2mp4ScaleFactor * sampleDuration, // samples count of this segment's duration nbSamples = Math.ceil((endDTS - startDTS) / frameDuration), // silent frame silentFrame = AAC.getSilentFrame(track.channelCount); // Can't remux if we can't generate a silent frame... if (!silentFrame) { logger.trace('Unable to remuxEmptyAudio since we were unable to get a silent frame for given audio codec!'); return; } let samples = []; for (var i = 0; i < nbSamples; i++) { var stamp = startDTS + i * frameDuration; samples.push({ unit: silentFrame.slice(0), pts: stamp, dts: stamp }); track.len += silentFrame.length; } track.samples = samples; this.remuxAudio(track, timeOffset, contiguous); } remuxID3(track, timeOffset) { var length = track.samples.length, sample; // consume samples if (length) { for (var index = 0; index < length; index++) { sample = track.samples[index]; // setting id3 pts, dts to relative time // using this._initPTS and this._initDTS to calculate relative time sample.pts = ((sample.pts - this._initPTS) / this.PES_TIMESCALE); sample.dts = ((sample.dts - this._initDTS) / this.PES_TIMESCALE); } this.observer.trigger(Event.FRAG_PARSING_METADATA, { id: this.id, level: this.level, sn: this.sn, samples: track.samples }); } track.samples = []; timeOffset = timeOffset; } remuxText(track, timeOffset) { track.samples.sort(function (a, b) { return (a.pts - b.pts); }); var length = track.samples.length, sample; // consume samples if (length) { for (var index = 0; index < length; index++) { sample = track.samples[index]; // setting text pts, dts to relative time // using this._initPTS and this._initDTS to calculate relative time sample.pts = ((sample.pts - this._initPTS) / this.PES_TIMESCALE); } this.observer.trigger(Event.FRAG_PARSING_USERDATA, { id: this.id, level: this.level, sn: this.sn, samples: track.samples }); } track.samples = []; timeOffset = timeOffset; } _PTSNormalize(value, reference) { var offset; if (reference === undefined) { return value; } if (reference < value) { // - 2^33 offset = -8589934592; } else { // + 2^33 offset = 8589934592; } /* PTS is 33bit (from 0 to 2^33 -1) if diff between value and reference is bigger than half of the amplitude (2^32) then it means that PTS looping occured. fill the gap */ while (Math.abs(value - reference) > 4294967296) { value += offset; } return value; } } export default MP4Remuxer;