/* eslint-disable */
/**
 * fMP4 remuxer
*/
import AAC from '../helper/aac';
import Event from '../events';
import { logger } from '../utils/logger';
import MP4 from '../remux/mp4-generator';
import { ErrorTypes, ErrorDetails } from '../errors';
import '../utils/polyfill';

class MP4Remuxer {
  constructor(observer, id, config) {
    this.observer = observer;
    this.id = id;
    this.config = config;
    this.ISGenerated = false;
    this.PES2MP4SCALEFACTOR = 4;
    this.PES_TIMESCALE = 90000;
    this.MP4_TIMESCALE = this.PES_TIMESCALE / this.PES2MP4SCALEFACTOR;
    this.nextAvcDts = 90300;
    this.H264_TIMEBASE = 3600;
  }

  get passthrough() {
    return false;
  }

  destroy() {
  }

  insertDiscontinuity() {
    this._initPTS = this._initDTS = undefined;
  }

  switchLevel() {
    this.ISGenerated = false;
  }

  pushVideo(level, sn, videoTrack, timeOffset, contiguous) {
    this.level = level;
    this.sn = sn;
    let videoData;
    // generate Init Segment if needed
    if (!this.ISGenerated) {
      this.generateVideoIS(videoTrack, timeOffset);
    }
    if (this.ISGenerated) {
      // if (videoTrack.samples.length) {
      this.remuxVideo_2(videoTrack, timeOffset, contiguous);
      // }
    }
  }

  remuxVideo_2(track, timeOffset, contiguous, audioTrackLength) {
    var offset = 8,
      pesTimeScale = this.PES_TIMESCALE,
      pes2mp4ScaleFactor = this.PES2MP4SCALEFACTOR,
      mp4SampleDuration,
      mdat, moof,
      firstPTS, firstDTS,
      nextDTS,
      inputSamples = track.samples,
      outputSamples = [];

    /* concatenate the video data and construct the mdat in place
      (need 8 more bytes to fill length and mpdat type) */
    mdat = new Uint8Array(track.len + (4 * track.nbNalu) + 8);
    let view = new DataView(mdat.buffer);
    view.setUint32(0, mdat.byteLength);
    mdat.set(MP4.types.mdat, 4);
    var sampleDuration = 0;
    let ptsnorm, dtsnorm, mp4Sample, lastDTS;

    for (let i = 0; i < inputSamples.length; i++) {
      let avcSample = inputSamples[i],
        mp4SampleLength = 0,
        compositionTimeOffset;
      // convert NALU bitstream to MP4 format (prepend NALU with size field)
      while (avcSample.units.units.length) {
        let unit = avcSample.units.units.shift();
        view.setUint32(offset, unit.data.byteLength);
        offset += 4;
        mdat.set(unit.data, offset);
        offset += unit.data.byteLength;
        mp4SampleLength += 4 + unit.data.byteLength;
      }

      let pts = avcSample.pts - this._initPTS;
      let dts = avcSample.dts - this._initDTS;
      dts = Math.min(pts, dts);

      if (lastDTS !== undefined) {
        ptsnorm = this._PTSNormalize(pts, lastDTS);
        dtsnorm = this._PTSNormalize(dts, lastDTS);
        sampleDuration = (dtsnorm - lastDTS)
        if (sampleDuration <= 0) {
          logger.log(`invalid sample duration at PTS/DTS: ${avcSample.pts}/${avcSample.dts}|dts norm: ${dtsnorm}|lastDTS: ${lastDTS}:${sampleDuration}`);
          sampleDuration = 1;
        }
      } else {
        var nextAvcDts = this.nextAvcDts, delta;
        ptsnorm = this._PTSNormalize(pts, nextAvcDts);
        dtsnorm = this._PTSNormalize(dts, nextAvcDts);
        if (nextAvcDts) {
          delta = Math.round((dtsnorm - nextAvcDts));
          if (/*contiguous ||*/ Math.abs(delta) < 600) {
            if (delta) {
              if (delta > 1) {
                logger.log(`AVC:${delta} ms hole between fragments detected,filling it`);
              } else if (delta < -1) {
                logger.log(`AVC:${(-delta)} ms overlapping between fragments detected`);
              }
              dtsnorm = nextAvcDts;
              ptsnorm = Math.max(ptsnorm - delta, dtsnorm);
              logger.log(`Video/PTS/DTS adjusted: ${ptsnorm}/${dtsnorm},delta:${delta}`);
            }
          }
        }
        this.firstPTS = Math.max(0, ptsnorm);
        this.firstDTS = Math.max(0, dtsnorm);
        sampleDuration = 0.03;
      }

      outputSamples.push({
        size: mp4SampleLength,
        duration: this.H264_TIMEBASE,
        cts: 0,
        flags: {
          isLeading: 0,
          isDependedOn: 0,
          hasRedundancy: 0,
          degradPrio: 0,
          dependsOn: avcSample.key ? 2 : 1,
          isNonSync: avcSample.key ? 0 : 1
        }
      });
      lastDTS = dtsnorm;

    }

    var lastSampleDuration = 0;
    if (outputSamples.length >= 2) {
      lastSampleDuration = outputSamples[outputSamples.length - 2].duration;
      outputSamples[0].duration = lastSampleDuration;
    }
    this.nextAvcDts = dtsnorm + lastSampleDuration;
    let dropped = track.dropped;
    track.len = 0;
    track.nbNalu = 0;
    track.dropped = 0;
    if (outputSamples.length && navigator.userAgent.toLowerCase().indexOf('chrome') > -1) {
      let flags = outputSamples[0].flags;
      flags.dependsOn = 2;
      flags.isNonSync = 0;
    }
    track.samples = outputSamples;
    moof = MP4.moof(track.sequenceNumber++, dtsnorm, track);
    track.samples = [];

    let data = {
      id: this.id,
      level: this.level,
      sn: this.sn,
      data1: moof,
      data2: mdat,
      startPTS: ptsnorm,
      endPTS: ptsnorm,
      startDTS: dtsnorm,
      endDTS: dtsnorm,
      type: 'video',
      nb: outputSamples.length,
      dropped: dropped
    };

    this.observer.trigger(Event.FRAG_PARSING_DATA, data);
    return data;
  }

  generateVideoIS(videoTrack, timeOffset) {
    var observer = this.observer,
      videoSamples = videoTrack.samples,
      pesTimeScale = this.PES_TIMESCALE,
      tracks = {},
      data = { id: this.id, level: this.level, sn: this.sn, tracks: tracks, unique: false },
      computePTSDTS = (this._initPTS === undefined),
      initPTS, initDTS;

    if (computePTSDTS) {
      initPTS = initDTS = Infinity;
    }

    if (videoTrack.sps && videoTrack.pps && videoSamples.length) {
      videoTrack.timescale = 90000;//this.MP4_TIMESCALE;
      tracks.video = {
        container: 'video/mp4',
        codec: videoTrack.codec,
        initSegment: MP4.initSegment([videoTrack]),
        metadata: {
          width: videoTrack.width,
          height: videoTrack.height
        }
      };
      if (computePTSDTS) {
        initPTS = Math.min(initPTS, videoSamples[0].pts - this.H264_TIMEBASE);
        initDTS = Math.min(initDTS, videoSamples[0].dts - this.H264_TIMEBASE);
      }
    }

    if (Object.keys(tracks).length) {
      observer.trigger(Event.FRAG_PARSING_INIT_SEGMENT, data);
      this.ISGenerated = true;
      if (computePTSDTS) {
        this._initPTS = initPTS;
        this._initDTS = initDTS;
      }
    } else {
      console.log("generateVideoIS ERROR==> ", ErrorTypes.MEDIA_ERROR);
    }
  }

  remux(level, sn, audioTrack, videoTrack, id3Track, textTrack, timeOffset, contiguous) {
    this.level = level;
    this.sn = sn;
    // generate Init Segment if needed
    if (!this.ISGenerated) {
      this.generateIS(audioTrack, videoTrack, timeOffset);
    }

    if (this.ISGenerated) {
      // Purposefully remuxing audio before video, so that remuxVideo can use nextAacPts, which is
      // calculated in remuxAudio.
      //logger.log('nb AAC samples:' + audioTrack.samples.length);
      if (audioTrack.samples.length) {
        let audioData = this.remuxAudio(audioTrack, timeOffset, contiguous);
        //logger.log('nb AVC samples:' + videoTrack.samples.length);
        if (videoTrack.samples.length) {
          let audioTrackLength;
          if (audioData) {
            audioTrackLength = audioData.endPTS - audioData.startPTS;
          }
          this.remuxVideo(videoTrack, timeOffset, contiguous, audioTrackLength);
        }
      } else {
        let videoData;
        //logger.log('nb AVC samples:' + videoTrack.samples.length);
        if (videoTrack.samples.length) {
          videoData = this.remuxVideo(videoTrack, timeOffset, contiguous);
        }
        if (videoData && audioTrack.codec) {
          this.remuxEmptyAudio(audioTrack, timeOffset, contiguous, videoData);
        }
      }
    }
    //logger.log('nb ID3 samples:' + audioTrack.samples.length);
    if (id3Track.samples.length) {
      this.remuxID3(id3Track, timeOffset);
    }
    //logger.log('nb ID3 samples:' + audioTrack.samples.length);
    if (textTrack.samples.length) {
      this.remuxText(textTrack, timeOffset);
    }
    //notify end of parsing
    this.observer.trigger(Event.FRAG_PARSED, { id: this.id, level: this.level, sn: this.sn });
  }

  generateIS(audioTrack, videoTrack, timeOffset) {
    var observer = this.observer,
      audioSamples = audioTrack.samples,
      videoSamples = videoTrack.samples,
      pesTimeScale = this.PES_TIMESCALE,
      tracks = {},
      data = { id: this.id, level: this.level, sn: this.sn, tracks: tracks, unique: false },
      computePTSDTS = (this._initPTS === undefined),
      initPTS, initDTS;

    if (computePTSDTS) {
      initPTS = initDTS = Infinity;
    }
    if (audioTrack.config && audioSamples.length) {
      audioTrack.timescale = audioTrack.audiosamplerate;
      // MP4 duration (track duration in seconds multiplied by timescale) is coded on 32 bits
      // we know that each AAC sample contains 1024 frames....
      // in order to avoid overflowing the 32 bit counter for large duration, we use smaller timescale (timescale/gcd)
      // we just need to ensure that AAC sample duration will still be an integer (will be 1024/gcd)
      if (audioTrack.timescale * audioTrack.duration > Math.pow(2, 32)) {
        let greatestCommonDivisor = function (a, b) {
          if (!b) {
            return a;
          }
          return greatestCommonDivisor(b, a % b);
        };
        audioTrack.timescale = audioTrack.audiosamplerate / greatestCommonDivisor(audioTrack.audiosamplerate, 1024);
      }
      logger.log('audio mp4 timescale :' + audioTrack.timescale);
      tracks.audio = {
        container: 'audio/mp4',
        codec: audioTrack.codec,
        initSegment: MP4.initSegment([audioTrack]),
        metadata: {
          channelCount: audioTrack.channelCount
        }
      };
      if (computePTSDTS) {
        // remember first PTS of this demuxing context. for audio, PTS + DTS ...
        initPTS = initDTS = audioSamples[0].pts - pesTimeScale * timeOffset;
      }
    }

    if (videoTrack.sps && videoTrack.pps && videoSamples.length) {
      videoTrack.timescale = this.MP4_TIMESCALE;
      tracks.video = {
        container: 'video/mp4',
        codec: videoTrack.codec,
        initSegment: MP4.initSegment([videoTrack]),
        metadata: {
          width: videoTrack.width,
          height: videoTrack.height
        }
      };
      if (computePTSDTS) {
        initPTS = Math.min(initPTS, videoSamples[0].pts - pesTimeScale * timeOffset);
        initDTS = Math.min(initDTS, videoSamples[0].dts - pesTimeScale * timeOffset);
      }
    }

    if (Object.keys(tracks).length) {
      observer.trigger(Event.FRAG_PARSING_INIT_SEGMENT, data);
      this.ISGenerated = true;
      if (computePTSDTS) {
        this._initPTS = initPTS;
        this._initDTS = initDTS;
      }
    } else {
      observer.trigger(Event.ERROR, { type: ErrorTypes.MEDIA_ERROR, id: this.id, details: ErrorDetails.FRAG_PARSING_ERROR, fatal: false, reason: 'no audio/video samples found' });
    }
  }

  remuxVideo(track, timeOffset, contiguous, audioTrackLength) {
    var offset = 8,
      pesTimeScale = this.PES_TIMESCALE,
      pes2mp4ScaleFactor = this.PES2MP4SCALEFACTOR,
      mp4SampleDuration,
      mdat, moof,
      firstPTS, firstDTS,
      nextDTS,
      lastPTS, lastDTS,
      inputSamples = track.samples,
      outputSamples = [];

    // PTS is coded on 33bits, and can loop from -2^32 to 2^32
    // PTSNormalize will make PTS/DTS value monotonic, we use last known DTS value as reference value
    let nextAvcDts;
    if (contiguous) {
      // if parsed fragment is contiguous with last one, let's use last DTS value as reference
      nextAvcDts = this.nextAvcDts;
    } else {
      // if not contiguous, let's use target timeOffset
      nextAvcDts = timeOffset * pesTimeScale;
    }

    // compute first DTS and last DTS, normalize them against reference value
    let sample = inputSamples[0];
    firstDTS = Math.max(this._PTSNormalize(sample.dts, nextAvcDts) - this._initDTS, 0);
    firstPTS = Math.max(this._PTSNormalize(sample.pts, nextAvcDts) - this._initDTS, 0);

    // check timestamp continuity accross consecutive fragments (this is to remove inter-fragment gap/hole)
    let delta = Math.round((firstDTS - nextAvcDts) / 90);
    // if fragment are contiguous, detect hole/overlapping between fragments
    if (contiguous) {
      if (delta) {
        if (delta > 1) {
          logger.log(`AVC:${delta} ms hole between fragments detected,filling it`);
        } else if (delta < -1) {
          logger.log(`AVC:${(-delta)} ms overlapping between fragments detected`);
        }
        // remove hole/gap : set DTS to next expected DTS
        firstDTS = nextAvcDts;
        inputSamples[0].dts = firstDTS + this._initDTS;
        // offset PTS as well, ensure that PTS is smaller or equal than new DTS
        firstPTS = Math.max(firstPTS - delta, nextAvcDts);
        inputSamples[0].pts = firstPTS + this._initDTS;
        logger.log(`Video/PTS/DTS adjusted: ${firstPTS}/${firstDTS},delta:${delta}`);
      }
    }
    nextDTS = firstDTS;

    // compute lastPTS/lastDTS
    sample = inputSamples[inputSamples.length - 1];
    lastDTS = Math.max(this._PTSNormalize(sample.dts, nextAvcDts) - this._initDTS, 0);
    lastPTS = Math.max(this._PTSNormalize(sample.pts, nextAvcDts) - this._initDTS, 0);
    lastPTS = Math.max(lastPTS, lastDTS);

    let vendor = navigator.vendor, userAgent = navigator.userAgent,
      isSafari = vendor && vendor.indexOf('Apple') > -1 && userAgent && !userAgent.match('CriOS');

    // on Safari let's signal the same sample duration for all samples
    // sample duration (as expected by trun MP4 boxes), should be the delta between sample DTS
    // set this constant duration as being the avg delta between consecutive DTS.
    if (isSafari) {
      mp4SampleDuration = Math.round((lastDTS - firstDTS) / (pes2mp4ScaleFactor * (inputSamples.length - 1)));
    }

    // normalize all PTS/DTS now ...
    for (let i = 0; i < inputSamples.length; i++) {
      let sample = inputSamples[i];
      if (isSafari) {
        // sample DTS is computed using a constant decoding offset (mp4SampleDuration) between samples
        sample.dts = firstDTS + i * pes2mp4ScaleFactor * mp4SampleDuration;
      } else {
        // ensure sample monotonic DTS
        sample.dts = Math.max(this._PTSNormalize(sample.dts, nextAvcDts) - this._initDTS, firstDTS);
        // ensure dts is a multiple of scale factor to avoid rounding issues
        sample.dts = Math.round(sample.dts / pes2mp4ScaleFactor) * pes2mp4ScaleFactor;
      }
      // we normalize PTS against nextAvcDts, we also substract initDTS (some streams don't start @ PTS O)
      // and we ensure that computed value is greater or equal than sample DTS
      sample.pts = Math.max(this._PTSNormalize(sample.pts, nextAvcDts) - this._initDTS, sample.dts);
      // ensure pts is a multiple of scale factor to avoid rounding issues
      sample.pts = Math.round(sample.pts / pes2mp4ScaleFactor) * pes2mp4ScaleFactor;

    }

    /* concatenate the video data and construct the mdat in place
      (need 8 more bytes to fill length and mpdat type) */
    mdat = new Uint8Array(track.len + (4 * track.nbNalu) + 8);
    let view = new DataView(mdat.buffer);
    view.setUint32(0, mdat.byteLength);
    mdat.set(MP4.types.mdat, 4);

    for (let i = 0; i < inputSamples.length; i++) {
      let avcSample = inputSamples[i],
        mp4SampleLength = 0,
        compositionTimeOffset;
      // convert NALU bitstream to MP4 format (prepend NALU with size field)
      while (avcSample.units.units.length) {
        let unit = avcSample.units.units.shift();
        view.setUint32(offset, unit.data.byteLength);
        offset += 4;
        mdat.set(unit.data, offset);
        offset += unit.data.byteLength;
        mp4SampleLength += 4 + unit.data.byteLength;
      }

      if (!isSafari) {
        // expected sample duration is the Decoding Timestamp diff of consecutive samples
        if (i < inputSamples.length - 1) {
          mp4SampleDuration = inputSamples[i + 1].dts - avcSample.dts;
        } else {
          let config = this.config,
            lastFrameDuration = avcSample.dts - inputSamples[i > 0 ? i - 1 : i].dts;
          if (config.stretchShortVideoTrack) {
            // In some cases, a segment's audio track duration may exceed the video track duration.
            // Since we've already remuxed audio, and we know how long the audio track is, we look to
            // see if the delta to the next segment is longer than the minimum of maxBufferHole and
            // maxSeekHole. If so, playback would potentially get stuck, so we artificially inflate
            // the duration of the last frame to minimize any potential gap between segments.
            let maxBufferHole = config.maxBufferHole,
              maxSeekHole = config.maxSeekHole,
              gapTolerance = Math.floor(Math.min(maxBufferHole, maxSeekHole) * pesTimeScale),
              deltaToFrameEnd = (audioTrackLength ? firstPTS + audioTrackLength * pesTimeScale : this.nextAacPts) - avcSample.pts;
            if (deltaToFrameEnd > gapTolerance) {
              // We subtract lastFrameDuration from deltaToFrameEnd to try to prevent any video
              // frame overlap. maxBufferHole/maxSeekHole should be >> lastFrameDuration anyway.
              mp4SampleDuration = deltaToFrameEnd - lastFrameDuration;
              if (mp4SampleDuration < 0) {
                mp4SampleDuration = lastFrameDuration;
              }
              logger.log(`It is approximately ${deltaToFrameEnd / 90} ms to the next segment; using duration ${mp4SampleDuration / 90} ms for the last video frame.`);
            } else {
              mp4SampleDuration = lastFrameDuration;
            }
          } else {
            mp4SampleDuration = lastFrameDuration;
          }
        }
        mp4SampleDuration /= pes2mp4ScaleFactor;
        compositionTimeOffset = Math.round((avcSample.pts - avcSample.dts) / pes2mp4ScaleFactor);
      } else {
        compositionTimeOffset = Math.max(0, mp4SampleDuration * Math.round((avcSample.pts - avcSample.dts) / (pes2mp4ScaleFactor * mp4SampleDuration)));
      }
      outputSamples.push({
        size: mp4SampleLength,
        // constant duration
        duration: mp4SampleDuration,
        cts: compositionTimeOffset,
        flags: {
          isLeading: 0,
          isDependedOn: 0,
          hasRedundancy: 0,
          degradPrio: 0,
          dependsOn: avcSample.key ? 2 : 1,
          isNonSync: avcSample.key ? 0 : 1
        }
      });
    }
    // next AVC sample DTS should be equal to last sample DTS + last sample duration (in PES timescale)
    this.nextAvcDts = lastDTS + mp4SampleDuration * pes2mp4ScaleFactor;
    let dropped = track.dropped;
    track.len = 0;
    track.nbNalu = 0;
    track.dropped = 0;
    if (outputSamples.length && navigator.userAgent.toLowerCase().indexOf('chrome') > -1) {
      let flags = outputSamples[0].flags;
      // chrome workaround, mark first sample as being a Random Access Point to avoid sourcebuffer append issue
      // https://code.google.com/p/chromium/issues/detail?id=229412
      flags.dependsOn = 2;
      flags.isNonSync = 0;
    }
    track.samples = outputSamples;
    moof = MP4.moof(track.sequenceNumber++, firstDTS / pes2mp4ScaleFactor, track);
    track.samples = [];

    let data = {
      id: this.id,
      level: this.level,
      sn: this.sn,
      data1: moof,
      data2: mdat,
      startPTS: firstPTS / pesTimeScale,
      endPTS: (lastPTS + pes2mp4ScaleFactor * mp4SampleDuration) / pesTimeScale,
      startDTS: firstPTS / pesTimeScale,
      endDTS: (lastPTS + pes2mp4ScaleFactor * mp4SampleDuration) / pesTimeScale,
      // startDTS: firstDTS / pesTimeScale,
      // endDTS: this.nextAvcDts / pesTimeScale,
      type: 'video',
      nb: outputSamples.length,
      dropped: dropped
    };
    this.observer.trigger(Event.FRAG_PARSING_DATA, data);
    return data;
  }

  remuxAudio(track, timeOffset, contiguous) {
    let pesTimeScale = this.PES_TIMESCALE,
      mp4timeScale = track.timescale,
      pes2mp4ScaleFactor = pesTimeScale / mp4timeScale,
      expectedSampleDuration = track.timescale * 1024 / track.audiosamplerate;
    var view,
      offset = 8,
      aacSample, mp4Sample,
      unit,
      mdat, moof,
      firstPTS, firstDTS, lastDTS,
      pts, dts, ptsnorm, dtsnorm,
      samples = [],
      samples0 = [];

    track.samples.sort(function (a, b) {
      return (a.pts - b.pts);
    });
    samples0 = track.samples;

    let nextAacPts = (contiguous ? this.nextAacPts : timeOffset * pesTimeScale);

    // If the audio track is missing samples, the frames seem to get "left-shifted" within the
    // resulting mp4 segment, causing sync issues and leaving gaps at the end of the audio segment.
    // In an effort to prevent this from happening, we inject frames here where there are gaps.
    // When possible, we inject a silent frame; when that's not possible, we duplicate the last
    // frame.
    let firstPtsNorm = this._PTSNormalize(samples0[0].pts - this._initPTS, nextAacPts),
      pesFrameDuration = expectedSampleDuration * pes2mp4ScaleFactor;
    var nextPtsNorm = firstPtsNorm + pesFrameDuration;
    for (var i = 1; i < samples0.length;) {
      // First, let's see how far off this frame is from where we expect it to be
      var sample = samples0[i],
        ptsNorm = this._PTSNormalize(sample.pts - this._initPTS, nextAacPts),
        delta = ptsNorm - nextPtsNorm;

      // If we're overlapping by more than half a duration, drop this sample
      if (delta < (-0.5 * pesFrameDuration)) {
        logger.log(`Dropping frame due to ${Math.abs(delta / 90)} ms overlap.`);
        samples0.splice(i, 1);
        track.len -= sample.unit.length;
        // Don't touch nextPtsNorm or i
      }
      // Otherwise, if we're more than half a frame away from where we should be, insert missing frames
      else if (delta > (0.5 * pesFrameDuration)) {
        var missing = Math.round(delta / pesFrameDuration);
        logger.log(`Injecting ${missing} frame${missing > 1 ? 's' : ''} of missing audio due to ${Math.round(delta / 90)} ms gap.`);
        for (var j = 0; j < missing; j++) {
          var newStamp = samples0[i - 1].pts + pesFrameDuration,
            fillFrame = AAC.getSilentFrame(track.channelCount);
          if (!fillFrame) {
            logger.log('Unable to get silent frame for given audio codec; duplicating last frame instead.');
            fillFrame = sample.unit.slice(0);
          }
          samples0.splice(i, 0, { unit: fillFrame, pts: newStamp, dts: newStamp });
          track.len += fillFrame.length;
          i += 1;
        }

        // Adjust sample to next expected pts
        nextPtsNorm += (missing + 1) * pesFrameDuration;
        sample.pts = samples0[i - 1].pts + pesFrameDuration;
        i += 1;
      }
      // Otherwise, we're within half a frame duration, so just adjust pts
      else {
        if (Math.abs(delta) > (0.1 * pesFrameDuration)) {
          logger.log(`Invalid frame delta ${ptsNorm - nextPtsNorm + pesFrameDuration} at PTS ${Math.round(ptsNorm / 90)} (should be ${pesFrameDuration}).`);
        }
        nextPtsNorm += pesFrameDuration;
        sample.pts = samples0[i - 1].pts + pesFrameDuration;
        i += 1;
      }
    }

    while (samples0.length) {
      aacSample = samples0.shift();
      unit = aacSample.unit;
      pts = aacSample.pts - this._initDTS;
      dts = aacSample.dts - this._initDTS;
      //logger.log(`Audio/PTS:${Math.round(pts/90)}`);
      // if not first sample
      if (lastDTS !== undefined) {
        ptsnorm = this._PTSNormalize(pts, lastDTS);
        dtsnorm = this._PTSNormalize(dts, lastDTS);
        mp4Sample.duration = (dtsnorm - lastDTS) / pes2mp4ScaleFactor;
      } else {
        ptsnorm = this._PTSNormalize(pts, nextAacPts);
        dtsnorm = this._PTSNormalize(dts, nextAacPts);
        let delta = Math.round(1000 * (ptsnorm - nextAacPts) / pesTimeScale);
        // if fragment are contiguous, detect hole/overlapping between fragments
        if (contiguous) {
          // log delta
          if (delta) {
            if (delta > 0) {
              logger.log(`${delta} ms hole between AAC samples detected,filling it`);
              // if we have frame overlap, overlapping for more than half a frame duraion
            } else if (delta < -12) {
              // drop overlapping audio frames... browser will deal with it
              logger.log(`${(-delta)} ms overlapping between AAC samples detected, drop frame`);
              track.len -= unit.byteLength;
              continue;
            }
            // set PTS/DTS to expected PTS/DTS
            ptsnorm = dtsnorm = nextAacPts;
          }
        }
        // remember first PTS of our aacSamples, ensure value is positive
        firstPTS = Math.max(0, ptsnorm);
        firstDTS = Math.max(0, dtsnorm);
        if (track.len > 0) {
          /* concatenate the audio data and construct the mdat in place
            (need 8 more bytes to fill length and mdat type) */
          mdat = new Uint8Array(track.len + 8);
          view = new DataView(mdat.buffer);
          view.setUint32(0, mdat.byteLength);
          mdat.set(MP4.types.mdat, 4);
        } else {
          // no audio samples
          return;
        }
      }
      mdat.set(unit, offset);
      offset += unit.byteLength;
      //console.log('PTS/DTS/initDTS/normPTS/normDTS/relative PTS : ${aacSample.pts}/${aacSample.dts}/${this._initDTS}/${ptsnorm}/${dtsnorm}/${(aacSample.pts/4294967296).toFixed(3)}');
      mp4Sample = {
        size: unit.byteLength,
        cts: 0,
        duration: 0,
        flags: {
          isLeading: 0,
          isDependedOn: 0,
          hasRedundancy: 0,
          degradPrio: 0,
          dependsOn: 1,
        }
      };
      samples.push(mp4Sample);
      lastDTS = dtsnorm;
    }
    var lastSampleDuration = 0;
    var nbSamples = samples.length;
    //set last sample duration as being identical to previous sample
    if (nbSamples >= 2) {
      lastSampleDuration = samples[nbSamples - 2].duration;
      mp4Sample.duration = lastSampleDuration;
    }
    if (nbSamples) {
      // next aac sample PTS should be equal to last sample PTS + duration
      this.nextAacPts = ptsnorm + pes2mp4ScaleFactor * lastSampleDuration;
      //logger.log('Audio/PTS/PTSend:' + aacSample.pts.toFixed(0) + '/' + this.nextAacDts.toFixed(0));
      track.len = 0;
      track.samples = samples;
      moof = MP4.moof(track.sequenceNumber++, firstDTS / pes2mp4ScaleFactor, track);
      track.samples = [];
      let audioData = {
        id: this.id,
        level: this.level,
        sn: this.sn,
        data1: moof,
        data2: mdat,
        startPTS: firstPTS / pesTimeScale,
        endPTS: this.nextAacPts / pesTimeScale,
        startDTS: firstDTS / pesTimeScale,
        endDTS: (dtsnorm + pes2mp4ScaleFactor * lastSampleDuration) / pesTimeScale,
        type: 'audio',
        nb: nbSamples
      };
      this.observer.trigger(Event.FRAG_PARSING_DATA, audioData);
      return audioData;
    }
    return null;
  }

  remuxEmptyAudio(track, timeOffset, contiguous, videoData) {
    let pesTimeScale = this.PES_TIMESCALE,
      mp4timeScale = track.timescale ? track.timescale : track.audiosamplerate,
      pes2mp4ScaleFactor = pesTimeScale / mp4timeScale,

      // sync with video's timestamp
      startDTS = videoData.startDTS * pesTimeScale + this._initDTS,
      endDTS = videoData.endDTS * pesTimeScale + this._initDTS,

      // one sample's duration value
      sampleDuration = 1024,
      frameDuration = pes2mp4ScaleFactor * sampleDuration,

      // samples count of this segment's duration
      nbSamples = Math.ceil((endDTS - startDTS) / frameDuration),

      // silent frame
      silentFrame = AAC.getSilentFrame(track.channelCount);

    // Can't remux if we can't generate a silent frame...
    if (!silentFrame) {
      logger.trace('Unable to remuxEmptyAudio since we were unable to get a silent frame for given audio codec!');
      return;
    }

    let samples = [];
    for (var i = 0; i < nbSamples; i++) {
      var stamp = startDTS + i * frameDuration;
      samples.push({ unit: silentFrame.slice(0), pts: stamp, dts: stamp });
      track.len += silentFrame.length;
    }
    track.samples = samples;

    this.remuxAudio(track, timeOffset, contiguous);
  }

  remuxID3(track, timeOffset) {
    var length = track.samples.length, sample;
    // consume samples
    if (length) {
      for (var index = 0; index < length; index++) {
        sample = track.samples[index];
        // setting id3 pts, dts to relative time
        // using this._initPTS and this._initDTS to calculate relative time
        sample.pts = ((sample.pts - this._initPTS) / this.PES_TIMESCALE);
        sample.dts = ((sample.dts - this._initDTS) / this.PES_TIMESCALE);
      }
      this.observer.trigger(Event.FRAG_PARSING_METADATA, {
        id: this.id,
        level: this.level,
        sn: this.sn,
        samples: track.samples
      });
    }

    track.samples = [];
    timeOffset = timeOffset;
  }

  remuxText(track, timeOffset) {
    track.samples.sort(function (a, b) {
      return (a.pts - b.pts);
    });

    var length = track.samples.length, sample;
    // consume samples
    if (length) {
      for (var index = 0; index < length; index++) {
        sample = track.samples[index];
        // setting text pts, dts to relative time
        // using this._initPTS and this._initDTS to calculate relative time
        sample.pts = ((sample.pts - this._initPTS) / this.PES_TIMESCALE);
      }
      this.observer.trigger(Event.FRAG_PARSING_USERDATA, {
        id: this.id,
        level: this.level,
        sn: this.sn,
        samples: track.samples
      });
    }

    track.samples = [];
    timeOffset = timeOffset;
  }

  _PTSNormalize(value, reference) {
    var offset;
    if (reference === undefined) {
      return value;
    }
    if (reference < value) {
      // - 2^33
      offset = -8589934592;
    } else {
      // + 2^33
      offset = 8589934592;
    }
    /* PTS is 33bit (from 0 to 2^33 -1)
      if diff between value and reference is bigger than half of the amplitude (2^32) then it means that
      PTS looping occured. fill the gap */
    while (Math.abs(value - reference) > 4294967296) {
      value += offset;
    }
    return value;
  }

}

export default MP4Remuxer;