001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.vis.audio;
031
032import gnu.trove.list.array.TFloatArrayList;
033
034import java.awt.Dimension;
035import java.util.ArrayList;
036
037import org.openimaj.audio.AudioFormat;
038import org.openimaj.audio.AudioStream;
039import org.openimaj.audio.SampleChunk;
040import org.openimaj.audio.processor.AudioProcessor;
041import org.openimaj.audio.samples.SampleBuffer;
042import org.openimaj.image.MBFImage;
043import org.openimaj.image.colour.RGBColour;
044import org.openimaj.image.renderer.MBFImageRenderer;
045import org.openimaj.image.typography.hershey.HersheyFont;
046import org.openimaj.math.geometry.point.Point2d;
047import org.openimaj.math.geometry.point.Point2dImpl;
048import org.openimaj.math.geometry.shape.Polygon;
049import org.openimaj.vis.DataUnitsTransformer;
050import org.openimaj.vis.VisualisationImpl;
051import org.openimaj.vis.timeline.TimelineObject;
052import org.openimaj.vis.timeline.TimelineObjectAdapter;
053
054/**
055 *      Utilises an audio processor to plot the audio waveform to an image. This class
056 *      is both a {@link VisualisationImpl} and a {@link TimelineObject}. This means that
057 *      it can be used to plot a complete visualisation of the overview of the data
058 *      or it can be used to plot temporal parts of the data into the visualisation window.
059 *      <p>
060 *      An internal class (AudioOverviewGenerator) can be used to generate overviews
061 *      if necessary.
062 *      <p>
063 *      This class also extends {@link TimelineObjectAdapter} which allows an audio
064 *      waveform to be put upon a timeline.
065 *
066 *  @author David Dupplaw (dpd@ecs.soton.ac.uk)
067 *
068 *      @created 9 Jun 2011
069 */
070public class AudioOverviewVisualisation extends VisualisationImpl<AudioStream>
071        implements TimelineObject
072{
073        /** */
074        private static final long serialVersionUID = 1L;
075
076        /**
077         *      Generates an audio overview. This is a lower-resolution version of
078         *      the audio waveform. It takes the maximum value from a set of
079         *      values and stores this as the overview. By default the processor
080         *      takes the maximum value from every 5000 samples.  The method
081         *      {@link #getAudioOverview(int, int)} allows resampling of that
082         *      overview.
083         *
084         *      @author David Dupplaw (dpd@ecs.soton.ac.uk)
085         *  @created 21 Jul 2011
086         *
087         */
088        public class AudioOverviewGenerator extends AudioProcessor
089        {
090        /** Number of bins in the overview */
091        private int nSamplesPerBin = -1;
092
093        /** The maximum in the current bin for each channel */
094        private float[] channelMax = null;
095
096        /** The number of samples so far in the current bin being processed */
097        private int nSamplesInBin = 0;
098
099        /** The overview data */
100        private TFloatArrayList[] audioOverview = null;
101
102        /** The number of channels in the audio data */
103                private int nChannels = 0;
104
105                /** The audio format of the samples we're processing */
106                private AudioFormat af = null;
107
108        /**
109         *      Constructor
110         *
111         *      @param nSamplesPerBin The number of samples per bin
112         *      @param nChannels The number of channels
113         */
114        public AudioOverviewGenerator( final int nSamplesPerBin, final int nChannels )
115                {
116                        this.nSamplesPerBin = nSamplesPerBin;
117                        this.nChannels = nChannels;
118                        this.audioOverview = new TFloatArrayList[nChannels];
119                        this.channelMax = new float[nChannels];
120
121                        for( int i = 0; i < nChannels; i++ )
122                                this.audioOverview[i] = new TFloatArrayList();
123                }
124
125        /**
126         *      {@inheritDoc}
127         *      @see org.openimaj.audio.processor.AudioProcessor#process(org.openimaj.audio.SampleChunk)
128         */
129                @Override
130                public SampleChunk process( final SampleChunk samples )
131                {
132                        // Store the format of the data
133                        if( this.af == null ) this.af = samples.getFormat();
134
135                        // Get the sample data
136                        final SampleBuffer b = samples.getSampleBuffer();
137
138                        // The number of samples (per channel) in this sample chunk
139                        final int nSamples = b.size() / this.af.getNumChannels();
140
141                        // Keep a running total of how many samples we've processed
142                        AudioOverviewVisualisation.this.numberOfProcessedSamples += nSamples;
143
144                        for( int x = 0; x < nSamples; x++ )
145                        {
146                                for( int c = 0; c < this.nChannels; c++ )
147                                {
148                                        // Store the maximum for the current bin
149                                        this.channelMax[c] = Math.max( this.channelMax[c],
150                                                        b.get(x*this.nChannels+c) );
151                                }
152
153                                // If we're still within the bin
154                                if( this.nSamplesInBin < this.nSamplesPerBin )
155                                        this.nSamplesInBin++;
156                                else
157                                {
158                                        // We've overflowed the bin
159                                        for( int c = 0; c < this.nChannels; c++ )
160                                        {
161                                                // Store the current bin
162                                                this.audioOverview[c].add( this.channelMax[c] );
163                                                this.channelMax[c] = Integer.MIN_VALUE;
164                                        }
165
166                                        // Reset for the next bin
167                                        this.nSamplesInBin = 0;
168                                }
169                        }
170
171                        return samples;
172                }
173
174                /**
175                 *      @return Get the overview data.
176                 */
177                public TFloatArrayList[] getAudioOverview()
178                {
179                        return this.audioOverview;
180                }
181
182                /**
183                 *      Refactors the overview to given another overview. If the number
184                 *      of bins specified an overview that's finer than the actual overview
185                 *      the original overview is returned. The output of this function will
186                 *      then only return an array list of nBins or less.
187                 *
188                 *      @param channel The channel to get
189                 *      @param nBins The number of bins in the overview
190                 *      @return A refactors overview
191                 */
192                public TFloatArrayList getAudioOverview( final int channel, final int nBins )
193                {
194                        if( nBins >= this.audioOverview[channel].size() )
195                                return this.audioOverview[channel];
196
197                        final TFloatArrayList ii = new TFloatArrayList();
198                        final double scalar = (double)this.audioOverview[channel].size() / (double)nBins;
199                        for( int xx = 0; xx < nBins; xx++ )
200                        {
201                                final int startBin = (int)(xx * scalar);
202                                final int endBin = (int)((xx+1) * scalar);
203                                float m = Integer.MIN_VALUE;
204                                for( int yy = startBin; yy < endBin; yy++ )
205                                        m = Math.max( m, this.audioOverview[channel].get(yy) );
206                                ii.add( m );
207                        }
208                        return ii;
209                }
210
211                /**
212                 *      Returns a polygon representing the channel overview.
213                 *      @param channel The channel to get the polygon for
214                 *      @param mirror whether to mirror the polygon
215                 *      @param width The width of the overview to return
216                 *      @return A polygon
217                 */
218                public Polygon getChannelPolygon( final int channel, final boolean mirror, final int width )
219                {
220                        final TFloatArrayList overview = this.getAudioOverview( channel, width );
221                        final int len = overview.size();
222                        final double scalar = width / (double)len;
223
224                        final ArrayList<Point2d> l = new ArrayList<Point2d>();
225                        for( int x = 0; x < len; x++ )
226                                l.add( new Point2dImpl( (float)(x * scalar), overview.get(x) ) );
227
228                        if( mirror )
229                        {
230                                for( int x = 1; x <= len; x++ )
231                                        l.add( new Point2dImpl( (float)((len-x)*scalar),
232                                                -overview.get(len-x) ) );
233                        }
234
235                        // Store how long the given overview is in milliseconds
236                        AudioOverviewVisualisation.this.millisecondsInView = (long)(AudioOverviewVisualisation.this.numberOfProcessedSamples /
237                                        this.af.getSampleRateKHz());
238
239                        return new Polygon( l );
240                }
241        }
242
243        /**
244         *      The calculation of how many milliseconds are in the last generated
245         *      view at the resampled overview.
246         */
247        public long millisecondsInView = 0;
248
249        /** The number of samples that were originally read in from the data */
250        public long numberOfProcessedSamples = 0;
251
252        /** The start time in milliseconds */
253        private long start = 0;
254
255        /** The length of the audio data */
256        private long length = 1000;
257
258        /** The overview generator */
259        private AudioOverviewGenerator aap = null;
260
261        /** Number of samples per pixel */
262        private int nSamplesPerPixel = 500;
263
264        /** Whether the generation is complete */
265        private boolean generationComplete = false;
266
267        /**
268         *      Default constructor
269         *      @param as The audio data to plot
270         */
271        public AudioOverviewVisualisation( final AudioStream as )
272        {
273                this.data  = as;
274                this.length = this.data.getLength();
275
276            // How many pixels we'll overview per pixel
277            this.nSamplesPerPixel  = 500;
278            // TODO: This is currently fixed-size but should be based on audio length
279
280            // Generate the audio overview
281                this.aap = new AudioOverviewGenerator(
282                                this.nSamplesPerPixel, this.data.getFormat().getNumChannels() );
283
284                new Thread( new Runnable()
285                {
286                        @Override
287                        public void run()
288                        {
289                            try
290                                {
291                                synchronized( AudioOverviewVisualisation.this.aap )
292                                        {
293                                                AudioOverviewVisualisation.this.aap.process( AudioOverviewVisualisation.this.data );
294                                                AudioOverviewVisualisation.this.generationComplete = true;
295                                                AudioOverviewVisualisation.this.aap.notifyAll();
296                                        }
297                                }
298                                catch( final Exception e )
299                                {
300                                        e.printStackTrace();
301                                        AudioOverviewVisualisation.this.aap = null;
302                                }
303                        }
304                } ).start();
305
306            this.setPreferredSize( new Dimension( -1, 100 ) );
307        }
308
309        /**
310         *      Generates a waveform image that fits within the given width and height
311         *      and drawn in the given colour. Note that the generated image is RGBA
312         *      so that the colours need to be 4 dimensions and may stipulate
313         *      transparency.
314         *
315         *      @param a The audio to draw
316         *      @param w The width of the image to return
317         *      @param h The height of the image to return
318         *      @param backgroundColour The background colour to draw on the image
319         *  @param colour The colour in which to draw the audio waveform.
320         *  @return The input image.
321         */
322        public static MBFImage getAudioWaveformImage( final AudioStream a,
323                        final int w, final int h, final Float[] backgroundColour,
324                        final Float[] colour  )
325    {
326                return new AudioOverviewVisualisation(a).plotAudioWaveformImage(
327                                w, h, backgroundColour, colour );
328    }
329
330        /**
331         *      Generates a waveform image that fits within the given width and height
332         *      and drawn in the given colour. Note that the generated image is RGBA
333         *      so that the colours need to be 4 dimensions and may stipulate
334         *      transparency.
335         *      <p>
336         *      If you require information about the plot afterwards you can check
337         *      the fields that are stored within this instance.
338         *
339         *      @param w The width of the image to return
340         *      @param h The height of the image to return
341         *      @param backgroundColour The background colour to draw on the image
342         *  @param colour The colour in which to draw the audio waveform.
343         *  @return The input image.
344         */
345        public MBFImage plotAudioWaveformImage(
346                        final int w, final int h, final Float[] backgroundColour,
347                        final Float[] colour  )
348        {
349                // Check if the overview's been generated, if not return empty image
350                if( this.aap == null )
351                {
352                        this.visImage.drawText( "Processing...", 20, 20, HersheyFont.TIMES_BOLD, 12, RGBColour.WHITE );
353                        return this.visImage;
354                }
355
356                // If the generation isn't complete (and aap is not null) it means
357                // we're processing the overview. Wait until it's finished.
358                while( !this.generationComplete )
359                {
360                        synchronized( this.aap )
361                        {
362                                try
363                                {
364                                        this.aap.wait();
365                                }
366                                catch( final InterruptedException e )
367                                {
368                                        e.printStackTrace();
369                                }
370                        }
371                }
372
373            // Work out how high each channel will be
374            final double channelSize = h/(double)this.data.getFormat().getNumChannels();
375
376            // This is the scalar from audio amplitude to pixels
377            final double ampScalar = channelSize / Integer.MAX_VALUE;
378
379            // Create the image we're going to draw on to - RGBA
380//          final MBFImage m = new MBFImage( w, h, 4 );
381            final MBFImageRenderer renderer = this.visImage.createRenderer();
382            this.visImage.fill( backgroundColour );
383
384            try
385        {
386                // Draw the polygon onto the image
387                final float ww = 1;
388                for( int i = 0; i < this.data.getFormat().getNumChannels(); i++ )
389                {
390                        final Polygon p = this.aap.getChannelPolygon( i, true, w );
391                        p.scaleXY( ww, (float)-ampScalar/2f );
392                        p.translate( 0f, (float)(-p.minY() + channelSize*i) );
393                        renderer.drawPolygonFilled( p, colour );
394                }
395        }
396        catch( final Exception e )
397        {
398                System.err.println( "WARNING: Could not process audio " +
399                                "to generate the audio overview.");
400                e.printStackTrace();
401        }
402
403                return this.visImage;
404    }
405
406        /**
407         *      Returns the length of the audio data in milliseconds.
408         *      Only returns the correct value after processing. Until then, it will
409         *      return 1 second.
410         *      @return Length of the audio data.
411         */
412        public long getLength()
413        {
414                return this.length;
415        }
416
417        /**
418         *      {@inheritDoc}
419         *      @see org.openimaj.vis.timeline.TimelineObjectAdapter#getStartTimeMilliseconds()
420         */
421        @Override
422        public long getStartTimeMilliseconds()
423        {
424                return this.start;
425        }
426
427        /**
428         *      {@inheritDoc}
429         *      @see org.openimaj.vis.timeline.TimelineObjectAdapter#getEndTimeMilliseconds()
430         */
431        @Override
432        public long getEndTimeMilliseconds()
433        {
434                return this.start + this.getLength();
435        }
436
437        /**
438         *      {@inheritDoc}
439         *      @see org.openimaj.vis.VisualisationImpl#update()
440         */
441        @Override
442        public void update()
443        {
444                if( this.visImage == null )
445                                this.plotAudioWaveformImage(
446                                        this.visImage.getWidth(), this.visImage.getHeight(),
447                                        new Float[]{1f,1f,0f,1f}, new Float[]{0f,0f,0f,1f} );
448        }
449
450        /**
451         *      {@inheritDoc}
452         *      @see org.openimaj.vis.timeline.TimelineObject#setStartTimeMilliseconds(long)
453         */
454        @Override
455        public void setStartTimeMilliseconds( final long l )
456        {
457                this.start = l;
458        }
459
460        /**
461         *      {@inheritDoc}
462         *      @see org.openimaj.vis.timeline.TimelineObject#setDataPixelTransformer(org.openimaj.vis.DataUnitsTransformer)
463         */
464        @Override
465        public void setDataPixelTransformer( final DataUnitsTransformer<Float[],double[],int[]> dpt )
466        {
467        }
468}