001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.vis.audio; 031 032import gnu.trove.list.array.TFloatArrayList; 033 034import java.awt.Dimension; 035import java.util.ArrayList; 036 037import org.openimaj.audio.AudioFormat; 038import org.openimaj.audio.AudioStream; 039import org.openimaj.audio.SampleChunk; 040import org.openimaj.audio.processor.AudioProcessor; 041import org.openimaj.audio.samples.SampleBuffer; 042import org.openimaj.image.MBFImage; 043import org.openimaj.image.colour.RGBColour; 044import org.openimaj.image.renderer.MBFImageRenderer; 045import org.openimaj.image.typography.hershey.HersheyFont; 046import org.openimaj.math.geometry.point.Point2d; 047import org.openimaj.math.geometry.point.Point2dImpl; 048import org.openimaj.math.geometry.shape.Polygon; 049import org.openimaj.vis.DataUnitsTransformer; 050import org.openimaj.vis.VisualisationImpl; 051import org.openimaj.vis.timeline.TimelineObject; 052import org.openimaj.vis.timeline.TimelineObjectAdapter; 053 054/** 055 * Utilises an audio processor to plot the audio waveform to an image. This class 056 * is both a {@link VisualisationImpl} and a {@link TimelineObject}. This means that 057 * it can be used to plot a complete visualisation of the overview of the data 058 * or it can be used to plot temporal parts of the data into the visualisation window. 059 * <p> 060 * An internal class (AudioOverviewGenerator) can be used to generate overviews 061 * if necessary. 062 * <p> 063 * This class also extends {@link TimelineObjectAdapter} which allows an audio 064 * waveform to be put upon a timeline. 065 * 066 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 067 * 068 * @created 9 Jun 2011 069 */ 070public class AudioOverviewVisualisation extends VisualisationImpl<AudioStream> 071 implements TimelineObject 072{ 073 /** */ 074 private static final long serialVersionUID = 1L; 075 076 /** 077 * Generates an audio overview. This is a lower-resolution version of 078 * the audio waveform. It takes the maximum value from a set of 079 * values and stores this as the overview. By default the processor 080 * takes the maximum value from every 5000 samples. The method 081 * {@link #getAudioOverview(int, int)} allows resampling of that 082 * overview. 083 * 084 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 085 * @created 21 Jul 2011 086 * 087 */ 088 public class AudioOverviewGenerator extends AudioProcessor 089 { 090 /** Number of bins in the overview */ 091 private int nSamplesPerBin = -1; 092 093 /** The maximum in the current bin for each channel */ 094 private float[] channelMax = null; 095 096 /** The number of samples so far in the current bin being processed */ 097 private int nSamplesInBin = 0; 098 099 /** The overview data */ 100 private TFloatArrayList[] audioOverview = null; 101 102 /** The number of channels in the audio data */ 103 private int nChannels = 0; 104 105 /** The audio format of the samples we're processing */ 106 private AudioFormat af = null; 107 108 /** 109 * Constructor 110 * 111 * @param nSamplesPerBin The number of samples per bin 112 * @param nChannels The number of channels 113 */ 114 public AudioOverviewGenerator( final int nSamplesPerBin, final int nChannels ) 115 { 116 this.nSamplesPerBin = nSamplesPerBin; 117 this.nChannels = nChannels; 118 this.audioOverview = new TFloatArrayList[nChannels]; 119 this.channelMax = new float[nChannels]; 120 121 for( int i = 0; i < nChannels; i++ ) 122 this.audioOverview[i] = new TFloatArrayList(); 123 } 124 125 /** 126 * {@inheritDoc} 127 * @see org.openimaj.audio.processor.AudioProcessor#process(org.openimaj.audio.SampleChunk) 128 */ 129 @Override 130 public SampleChunk process( final SampleChunk samples ) 131 { 132 // Store the format of the data 133 if( this.af == null ) this.af = samples.getFormat(); 134 135 // Get the sample data 136 final SampleBuffer b = samples.getSampleBuffer(); 137 138 // The number of samples (per channel) in this sample chunk 139 final int nSamples = b.size() / this.af.getNumChannels(); 140 141 // Keep a running total of how many samples we've processed 142 AudioOverviewVisualisation.this.numberOfProcessedSamples += nSamples; 143 144 for( int x = 0; x < nSamples; x++ ) 145 { 146 for( int c = 0; c < this.nChannels; c++ ) 147 { 148 // Store the maximum for the current bin 149 this.channelMax[c] = Math.max( this.channelMax[c], 150 b.get(x*this.nChannels+c) ); 151 } 152 153 // If we're still within the bin 154 if( this.nSamplesInBin < this.nSamplesPerBin ) 155 this.nSamplesInBin++; 156 else 157 { 158 // We've overflowed the bin 159 for( int c = 0; c < this.nChannels; c++ ) 160 { 161 // Store the current bin 162 this.audioOverview[c].add( this.channelMax[c] ); 163 this.channelMax[c] = Integer.MIN_VALUE; 164 } 165 166 // Reset for the next bin 167 this.nSamplesInBin = 0; 168 } 169 } 170 171 return samples; 172 } 173 174 /** 175 * @return Get the overview data. 176 */ 177 public TFloatArrayList[] getAudioOverview() 178 { 179 return this.audioOverview; 180 } 181 182 /** 183 * Refactors the overview to given another overview. If the number 184 * of bins specified an overview that's finer than the actual overview 185 * the original overview is returned. The output of this function will 186 * then only return an array list of nBins or less. 187 * 188 * @param channel The channel to get 189 * @param nBins The number of bins in the overview 190 * @return A refactors overview 191 */ 192 public TFloatArrayList getAudioOverview( final int channel, final int nBins ) 193 { 194 if( nBins >= this.audioOverview[channel].size() ) 195 return this.audioOverview[channel]; 196 197 final TFloatArrayList ii = new TFloatArrayList(); 198 final double scalar = (double)this.audioOverview[channel].size() / (double)nBins; 199 for( int xx = 0; xx < nBins; xx++ ) 200 { 201 final int startBin = (int)(xx * scalar); 202 final int endBin = (int)((xx+1) * scalar); 203 float m = Integer.MIN_VALUE; 204 for( int yy = startBin; yy < endBin; yy++ ) 205 m = Math.max( m, this.audioOverview[channel].get(yy) ); 206 ii.add( m ); 207 } 208 return ii; 209 } 210 211 /** 212 * Returns a polygon representing the channel overview. 213 * @param channel The channel to get the polygon for 214 * @param mirror whether to mirror the polygon 215 * @param width The width of the overview to return 216 * @return A polygon 217 */ 218 public Polygon getChannelPolygon( final int channel, final boolean mirror, final int width ) 219 { 220 final TFloatArrayList overview = this.getAudioOverview( channel, width ); 221 final int len = overview.size(); 222 final double scalar = width / (double)len; 223 224 final ArrayList<Point2d> l = new ArrayList<Point2d>(); 225 for( int x = 0; x < len; x++ ) 226 l.add( new Point2dImpl( (float)(x * scalar), overview.get(x) ) ); 227 228 if( mirror ) 229 { 230 for( int x = 1; x <= len; x++ ) 231 l.add( new Point2dImpl( (float)((len-x)*scalar), 232 -overview.get(len-x) ) ); 233 } 234 235 // Store how long the given overview is in milliseconds 236 AudioOverviewVisualisation.this.millisecondsInView = (long)(AudioOverviewVisualisation.this.numberOfProcessedSamples / 237 this.af.getSampleRateKHz()); 238 239 return new Polygon( l ); 240 } 241 } 242 243 /** 244 * The calculation of how many milliseconds are in the last generated 245 * view at the resampled overview. 246 */ 247 public long millisecondsInView = 0; 248 249 /** The number of samples that were originally read in from the data */ 250 public long numberOfProcessedSamples = 0; 251 252 /** The start time in milliseconds */ 253 private long start = 0; 254 255 /** The length of the audio data */ 256 private long length = 1000; 257 258 /** The overview generator */ 259 private AudioOverviewGenerator aap = null; 260 261 /** Number of samples per pixel */ 262 private int nSamplesPerPixel = 500; 263 264 /** Whether the generation is complete */ 265 private boolean generationComplete = false; 266 267 /** 268 * Default constructor 269 * @param as The audio data to plot 270 */ 271 public AudioOverviewVisualisation( final AudioStream as ) 272 { 273 this.data = as; 274 this.length = this.data.getLength(); 275 276 // How many pixels we'll overview per pixel 277 this.nSamplesPerPixel = 500; 278 // TODO: This is currently fixed-size but should be based on audio length 279 280 // Generate the audio overview 281 this.aap = new AudioOverviewGenerator( 282 this.nSamplesPerPixel, this.data.getFormat().getNumChannels() ); 283 284 new Thread( new Runnable() 285 { 286 @Override 287 public void run() 288 { 289 try 290 { 291 synchronized( AudioOverviewVisualisation.this.aap ) 292 { 293 AudioOverviewVisualisation.this.aap.process( AudioOverviewVisualisation.this.data ); 294 AudioOverviewVisualisation.this.generationComplete = true; 295 AudioOverviewVisualisation.this.aap.notifyAll(); 296 } 297 } 298 catch( final Exception e ) 299 { 300 e.printStackTrace(); 301 AudioOverviewVisualisation.this.aap = null; 302 } 303 } 304 } ).start(); 305 306 this.setPreferredSize( new Dimension( -1, 100 ) ); 307 } 308 309 /** 310 * Generates a waveform image that fits within the given width and height 311 * and drawn in the given colour. Note that the generated image is RGBA 312 * so that the colours need to be 4 dimensions and may stipulate 313 * transparency. 314 * 315 * @param a The audio to draw 316 * @param w The width of the image to return 317 * @param h The height of the image to return 318 * @param backgroundColour The background colour to draw on the image 319 * @param colour The colour in which to draw the audio waveform. 320 * @return The input image. 321 */ 322 public static MBFImage getAudioWaveformImage( final AudioStream a, 323 final int w, final int h, final Float[] backgroundColour, 324 final Float[] colour ) 325 { 326 return new AudioOverviewVisualisation(a).plotAudioWaveformImage( 327 w, h, backgroundColour, colour ); 328 } 329 330 /** 331 * Generates a waveform image that fits within the given width and height 332 * and drawn in the given colour. Note that the generated image is RGBA 333 * so that the colours need to be 4 dimensions and may stipulate 334 * transparency. 335 * <p> 336 * If you require information about the plot afterwards you can check 337 * the fields that are stored within this instance. 338 * 339 * @param w The width of the image to return 340 * @param h The height of the image to return 341 * @param backgroundColour The background colour to draw on the image 342 * @param colour The colour in which to draw the audio waveform. 343 * @return The input image. 344 */ 345 public MBFImage plotAudioWaveformImage( 346 final int w, final int h, final Float[] backgroundColour, 347 final Float[] colour ) 348 { 349 // Check if the overview's been generated, if not return empty image 350 if( this.aap == null ) 351 { 352 this.visImage.drawText( "Processing...", 20, 20, HersheyFont.TIMES_BOLD, 12, RGBColour.WHITE ); 353 return this.visImage; 354 } 355 356 // If the generation isn't complete (and aap is not null) it means 357 // we're processing the overview. Wait until it's finished. 358 while( !this.generationComplete ) 359 { 360 synchronized( this.aap ) 361 { 362 try 363 { 364 this.aap.wait(); 365 } 366 catch( final InterruptedException e ) 367 { 368 e.printStackTrace(); 369 } 370 } 371 } 372 373 // Work out how high each channel will be 374 final double channelSize = h/(double)this.data.getFormat().getNumChannels(); 375 376 // This is the scalar from audio amplitude to pixels 377 final double ampScalar = channelSize / Integer.MAX_VALUE; 378 379 // Create the image we're going to draw on to - RGBA 380// final MBFImage m = new MBFImage( w, h, 4 ); 381 final MBFImageRenderer renderer = this.visImage.createRenderer(); 382 this.visImage.fill( backgroundColour ); 383 384 try 385 { 386 // Draw the polygon onto the image 387 final float ww = 1; 388 for( int i = 0; i < this.data.getFormat().getNumChannels(); i++ ) 389 { 390 final Polygon p = this.aap.getChannelPolygon( i, true, w ); 391 p.scaleXY( ww, (float)-ampScalar/2f ); 392 p.translate( 0f, (float)(-p.minY() + channelSize*i) ); 393 renderer.drawPolygonFilled( p, colour ); 394 } 395 } 396 catch( final Exception e ) 397 { 398 System.err.println( "WARNING: Could not process audio " + 399 "to generate the audio overview."); 400 e.printStackTrace(); 401 } 402 403 return this.visImage; 404 } 405 406 /** 407 * Returns the length of the audio data in milliseconds. 408 * Only returns the correct value after processing. Until then, it will 409 * return 1 second. 410 * @return Length of the audio data. 411 */ 412 public long getLength() 413 { 414 return this.length; 415 } 416 417 /** 418 * {@inheritDoc} 419 * @see org.openimaj.vis.timeline.TimelineObjectAdapter#getStartTimeMilliseconds() 420 */ 421 @Override 422 public long getStartTimeMilliseconds() 423 { 424 return this.start; 425 } 426 427 /** 428 * {@inheritDoc} 429 * @see org.openimaj.vis.timeline.TimelineObjectAdapter#getEndTimeMilliseconds() 430 */ 431 @Override 432 public long getEndTimeMilliseconds() 433 { 434 return this.start + this.getLength(); 435 } 436 437 /** 438 * {@inheritDoc} 439 * @see org.openimaj.vis.VisualisationImpl#update() 440 */ 441 @Override 442 public void update() 443 { 444 if( this.visImage == null ) 445 this.plotAudioWaveformImage( 446 this.visImage.getWidth(), this.visImage.getHeight(), 447 new Float[]{1f,1f,0f,1f}, new Float[]{0f,0f,0f,1f} ); 448 } 449 450 /** 451 * {@inheritDoc} 452 * @see org.openimaj.vis.timeline.TimelineObject#setStartTimeMilliseconds(long) 453 */ 454 @Override 455 public void setStartTimeMilliseconds( final long l ) 456 { 457 this.start = l; 458 } 459 460 /** 461 * {@inheritDoc} 462 * @see org.openimaj.vis.timeline.TimelineObject#setDataPixelTransformer(org.openimaj.vis.DataUnitsTransformer) 463 */ 464 @Override 465 public void setDataPixelTransformer( final DataUnitsTransformer<Float[],double[],int[]> dpt ) 466 { 467 } 468}