libspeexenc.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2009 Justin Ruggles
3  * Copyright (c) 2009 Xuggle Incorporated
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
84 #include <speex/speex.h>
85 #include <speex/speex_header.h>
86 #include <speex/speex_stereo.h>
87 
89 #include "libavutil/common.h"
90 #include "libavutil/opt.h"
91 #include "avcodec.h"
92 #include "internal.h"
93 #include "audio_frame_queue.h"
94 
95 typedef struct {
96  AVClass *class;
97  SpeexBits bits;
98  SpeexHeader header;
99  void *enc_state;
101  float vbr_quality;
103  int abr;
104  int vad;
105  int dtx;
109 
112 {
113  const char *mode_str = "unknown";
114 
115  av_log(avctx, AV_LOG_DEBUG, "channels: %d\n", avctx->channels);
116  switch (s->header.mode) {
117  case SPEEX_MODEID_NB: mode_str = "narrowband"; break;
118  case SPEEX_MODEID_WB: mode_str = "wideband"; break;
119  case SPEEX_MODEID_UWB: mode_str = "ultra-wideband"; break;
120  }
121  av_log(avctx, AV_LOG_DEBUG, "mode: %s\n", mode_str);
122  if (s->header.vbr) {
123  av_log(avctx, AV_LOG_DEBUG, "rate control: VBR\n");
124  av_log(avctx, AV_LOG_DEBUG, " quality: %f\n", s->vbr_quality);
125  } else if (s->abr) {
126  av_log(avctx, AV_LOG_DEBUG, "rate control: ABR\n");
127  av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
128  } else {
129  av_log(avctx, AV_LOG_DEBUG, "rate control: CBR\n");
130  av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
131  }
132  av_log(avctx, AV_LOG_DEBUG, "complexity: %d\n",
133  avctx->compression_level);
134  av_log(avctx, AV_LOG_DEBUG, "frame size: %d samples\n",
135  avctx->frame_size);
136  av_log(avctx, AV_LOG_DEBUG, "frames per packet: %d\n",
137  s->frames_per_packet);
138  av_log(avctx, AV_LOG_DEBUG, "packet size: %d\n",
139  avctx->frame_size * s->frames_per_packet);
140  av_log(avctx, AV_LOG_DEBUG, "voice activity detection: %d\n", s->vad);
141  av_log(avctx, AV_LOG_DEBUG, "discontinuous transmission: %d\n", s->dtx);
142 }
143 
145 {
146  LibSpeexEncContext *s = avctx->priv_data;
147  const SpeexMode *mode;
148  uint8_t *header_data;
149  int header_size;
150  int32_t complexity;
151 
152  /* channels */
153  if (avctx->channels < 1 || avctx->channels > 2) {
154  av_log(avctx, AV_LOG_ERROR, "Invalid channels (%d). Only stereo and "
155  "mono are supported\n", avctx->channels);
156  return AVERROR(EINVAL);
157  }
158 
159  /* sample rate and encoding mode */
160  switch (avctx->sample_rate) {
161  case 8000: mode = &speex_nb_mode; break;
162  case 16000: mode = &speex_wb_mode; break;
163  case 32000: mode = &speex_uwb_mode; break;
164  default:
165  av_log(avctx, AV_LOG_ERROR, "Sample rate of %d Hz is not supported. "
166  "Resample to 8, 16, or 32 kHz.\n", avctx->sample_rate);
167  return AVERROR(EINVAL);
168  }
169 
170  /* initialize libspeex */
171  s->enc_state = speex_encoder_init(mode);
172  if (!s->enc_state) {
173  av_log(avctx, AV_LOG_ERROR, "Error initializing libspeex\n");
174  return -1;
175  }
176  speex_init_header(&s->header, avctx->sample_rate, avctx->channels, mode);
177 
178  /* rate control method and parameters */
179  if (avctx->flags & CODEC_FLAG_QSCALE) {
180  /* VBR */
181  s->header.vbr = 1;
182  s->vad = 1; /* VAD is always implicitly activated for VBR */
183  speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR, &s->header.vbr);
184  s->vbr_quality = av_clipf(avctx->global_quality / (float)FF_QP2LAMBDA,
185  0.0f, 10.0f);
186  speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR_QUALITY, &s->vbr_quality);
187  } else {
188  s->header.bitrate = avctx->bit_rate;
189  if (avctx->bit_rate > 0) {
190  /* CBR or ABR by bitrate */
191  if (s->abr) {
192  speex_encoder_ctl(s->enc_state, SPEEX_SET_ABR,
193  &s->header.bitrate);
194  speex_encoder_ctl(s->enc_state, SPEEX_GET_ABR,
195  &s->header.bitrate);
196  } else {
197  speex_encoder_ctl(s->enc_state, SPEEX_SET_BITRATE,
198  &s->header.bitrate);
199  speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
200  &s->header.bitrate);
201  }
202  } else {
203  /* CBR by quality */
204  speex_encoder_ctl(s->enc_state, SPEEX_SET_QUALITY,
205  &s->cbr_quality);
206  speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
207  &s->header.bitrate);
208  }
209  /* stereo side information adds about 800 bps to the base bitrate */
210  /* TODO: this should be calculated exactly */
211  avctx->bit_rate = s->header.bitrate + (avctx->channels == 2 ? 800 : 0);
212  }
213 
214  /* VAD is activated with VBR or can be turned on by itself */
215  if (s->vad)
216  speex_encoder_ctl(s->enc_state, SPEEX_SET_VAD, &s->vad);
217 
218  /* Activiting Discontinuous Transmission */
219  if (s->dtx) {
220  speex_encoder_ctl(s->enc_state, SPEEX_SET_DTX, &s->dtx);
221  if (!(s->abr || s->vad || s->header.vbr))
222  av_log(avctx, AV_LOG_WARNING, "DTX is not much of use without ABR, VAD or VBR\n");
223  }
224 
225  /* set encoding complexity */
227  complexity = av_clip(avctx->compression_level, 0, 10);
228  speex_encoder_ctl(s->enc_state, SPEEX_SET_COMPLEXITY, &complexity);
229  }
230  speex_encoder_ctl(s->enc_state, SPEEX_GET_COMPLEXITY, &complexity);
231  avctx->compression_level = complexity;
232 
233  /* set packet size */
234  avctx->frame_size = s->header.frame_size;
235  s->header.frames_per_packet = s->frames_per_packet;
236 
237  /* set encoding delay */
238  speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->delay);
239  ff_af_queue_init(avctx, &s->afq);
240 
241  /* create header packet bytes from header struct */
242  /* note: libspeex allocates the memory for header_data, which is freed
243  below with speex_header_free() */
244  header_data = speex_header_to_packet(&s->header, &header_size);
245 
246  /* allocate extradata and coded_frame */
247  avctx->extradata = av_malloc(header_size + FF_INPUT_BUFFER_PADDING_SIZE);
248  if (!avctx->extradata) {
249  speex_header_free(header_data);
250  speex_encoder_destroy(s->enc_state);
251  av_log(avctx, AV_LOG_ERROR, "memory allocation error\n");
252  return AVERROR(ENOMEM);
253  }
254 #if FF_API_OLD_ENCODE_AUDIO
255  avctx->coded_frame = avcodec_alloc_frame();
256  if (!avctx->coded_frame) {
257  av_freep(&avctx->extradata);
258  speex_header_free(header_data);
259  speex_encoder_destroy(s->enc_state);
260  av_log(avctx, AV_LOG_ERROR, "memory allocation error\n");
261  return AVERROR(ENOMEM);
262  }
263 #endif
264 
265  /* copy header packet to extradata */
266  memcpy(avctx->extradata, header_data, header_size);
267  avctx->extradata_size = header_size;
268  speex_header_free(header_data);
269 
270  /* init libspeex bitwriter */
271  speex_bits_init(&s->bits);
272 
273  print_enc_params(avctx, s);
274  return 0;
275 }
276 
277 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
278  const AVFrame *frame, int *got_packet_ptr)
279 {
280  LibSpeexEncContext *s = avctx->priv_data;
281  int16_t *samples = frame ? (int16_t *)frame->data[0] : NULL;
282  int ret;
283 
284  if (samples) {
285  /* encode Speex frame */
286  if (avctx->channels == 2)
287  speex_encode_stereo_int(samples, s->header.frame_size, &s->bits);
288  speex_encode_int(s->enc_state, samples, &s->bits);
289  s->pkt_frame_count++;
290  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
291  return ret;
292  } else {
293  /* handle end-of-stream */
294  if (!s->pkt_frame_count)
295  return 0;
296  /* add extra terminator codes for unused frames in last packet */
297  while (s->pkt_frame_count < s->frames_per_packet) {
298  speex_bits_pack(&s->bits, 15, 5);
299  s->pkt_frame_count++;
300  }
301  }
302 
303  /* write output if all frames for the packet have been encoded */
304  if (s->pkt_frame_count == s->frames_per_packet) {
305  s->pkt_frame_count = 0;
306  if ((ret = ff_alloc_packet(avpkt, speex_bits_nbytes(&s->bits)))) {
307  av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
308  return ret;
309  }
310  ret = speex_bits_write(&s->bits, avpkt->data, avpkt->size);
311  speex_bits_reset(&s->bits);
312 
313  /* Get the next frame pts/duration */
315  &avpkt->pts, &avpkt->duration);
316 
317  avpkt->size = ret;
318  *got_packet_ptr = 1;
319  return 0;
320  }
321  return 0;
322 }
323 
325 {
326  LibSpeexEncContext *s = avctx->priv_data;
327 
328  speex_bits_destroy(&s->bits);
329  speex_encoder_destroy(s->enc_state);
330 
331  ff_af_queue_close(&s->afq);
332 #if FF_API_OLD_ENCODE_AUDIO
333  av_freep(&avctx->coded_frame);
334 #endif
335  av_freep(&avctx->extradata);
336 
337  return 0;
338 }
339 
340 #define OFFSET(x) offsetof(LibSpeexEncContext, x)
341 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
342 static const AVOption options[] = {
343  { "abr", "Use average bit rate", OFFSET(abr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
344  { "cbr_quality", "Set quality value (0 to 10) for CBR", OFFSET(cbr_quality), AV_OPT_TYPE_INT, { .i64 = 8 }, 0, 10, AE },
345  { "frames_per_packet", "Number of frames to encode in each packet", OFFSET(frames_per_packet), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 8, AE },
346  { "vad", "Voice Activity Detection", OFFSET(vad), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
347  { "dtx", "Discontinuous Transmission", OFFSET(dtx), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
348  { NULL },
349 };
350 
351 static const AVClass class = {
352  .class_name = "libspeex",
353  .item_name = av_default_item_name,
354  .option = options,
356 };
357 
358 static const AVCodecDefault defaults[] = {
359  { "b", "0" },
360  { "compression_level", "3" },
361  { NULL },
362 };
363 
365  .name = "libspeex",
366  .type = AVMEDIA_TYPE_AUDIO,
367  .id = AV_CODEC_ID_SPEEX,
368  .priv_data_size = sizeof(LibSpeexEncContext),
369  .init = encode_init,
370  .encode2 = encode_frame,
371  .close = encode_close,
372  .capabilities = CODEC_CAP_DELAY,
373  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
375  .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO,
377  0 },
378  .supported_samplerates = (const int[]){ 8000, 16000, 32000, 0 },
379  .long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"),
380  .priv_class = &class,
381  .defaults = defaults,
382 };
int pkt_frame_count
frame count for the current packet
Definition: libspeexenc.c:106
void * av_malloc(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:61
static int16_t * samples
static av_cold int encode_init(AVCodecContext *avctx)
Definition: libspeexenc.c:144
This structure describes decoded (raw) audio or video data.
Definition: avcodec.h:989
AVOption.
Definition: opt.h:233
static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: libspeexenc.c:277
AVFrame * coded_frame
the picture in the bitstream
Definition: avcodec.h:2725
int size
Definition: avcodec.h:916
int cbr_quality
CBR quality 0 to 10.
Definition: libspeexenc.c:102
int vad
flag to enable VAD
Definition: libspeexenc.c:104
SpeexBits bits
libspeex bitwriter context
Definition: libspeexenc.c:97
static av_cold int encode_close(AVCodecContext *avctx)
Definition: libspeexenc.c:324
#define AV_CH_LAYOUT_STEREO
int dtx
flag to enable DTX
Definition: libspeexenc.c:105
signed 16 bits
Definition: samplefmt.h:52
AVCodec.
Definition: avcodec.h:2960
void av_freep(void *arg)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc() and set the pointer ...
Definition: mem.c:151
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:38
uint8_t
AVOptions.
AudioFrameQueue afq
frame queue
Definition: libspeexenc.c:107
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
Definition: avcodec.h:1454
AVCodec ff_libspeex_encoder
Definition: libspeexenc.c:364
uint8_t * data
Definition: avcodec.h:915
int duration
Duration of this packet in AVStream->time_base units, 0 if unknown.
Definition: avcodec.h:937
static int init(AVCodecParserContext *s)
Definition: h264_parser.c:335
int frames_per_packet
number of frames to encode in each packet
Definition: libspeexenc.c:100
static const AVCodecDefault defaults[]
Definition: libspeexenc.c:358
sample_fmts
Definition: avconv_filter.c:63
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:88
int flags
CODEC_FLAG_*.
Definition: avcodec.h:1434
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:146
const char * name
Name of the codec implementation.
Definition: avcodec.h:2967
#define AE
Definition: libspeexenc.c:341
int ff_af_queue_add(AudioFrameQueue *afq, const AVFrame *f)
Add a frame to the queue.
AVFrame * avcodec_alloc_frame(void)
Allocate an AVFrame and set its fields to default values.
Definition: utils.c:616
int bit_rate
the average bitrate
Definition: avcodec.h:1404
audio channel layout utility functions
int32_t
int ff_alloc_packet(AVPacket *avpkt, int size)
Check AVPacket size and/or allocate data.
Definition: utils.c:878
LIBAVUTIL_VERSION_INT
Definition: eval.c:52
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:2124
NULL
Definition: eval.c:52
void * enc_state
libspeex encoder state
Definition: libspeexenc.c:99
external API header
#define OFFSET(x)
Definition: libspeexenc.c:340
int compression_level
Definition: avcodec.h:1426
AV_SAMPLE_FMT_NONE
Definition: avconv_filter.c:63
int sample_rate
samples per second
Definition: avcodec.h:2104
av_default_item_name
Definition: dnxhdenc.c:43
main external API structure.
Definition: avcodec.h:1339
static void close(AVCodecParserContext *s)
Definition: h264_parser.c:326
int extradata_size
Definition: avcodec.h:1455
Describe the class of an AVClass context structure.
Definition: log.h:33
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:1420
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: avcodec.h:997
common internal api header.
common internal and external API header
static av_cold void print_enc_params(AVCodecContext *avctx, LibSpeexEncContext *s)
Definition: libspeexenc.c:110
AVSampleFormat
Audio Sample Formats.
Definition: samplefmt.h:49
SpeexHeader header
libspeex header struct
Definition: libspeexenc.c:98
static const AVOption options[]
Definition: libspeexenc.c:342
void * priv_data
Definition: avcodec.h:1382
void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
Initialize AudioFrameQueue.
void ff_af_queue_remove(AudioFrameQueue *afq, int nb_samples, int64_t *pts, int *duration)
Remove frame(s) from the queue.
int channels
number of audio channels
Definition: avcodec.h:2105
int abr
flag to enable ABR
Definition: libspeexenc.c:103
void ff_af_queue_close(AudioFrameQueue *afq)
Close AudioFrameQueue.
float vbr_quality
VBR quality 0.0 to 10.0.
Definition: libspeexenc.c:101
#define AV_CH_LAYOUT_MONO
This structure stores compressed data.
Definition: avcodec.h:898
int delay
Codec delay.
Definition: avcodec.h:1497
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: avcodec.h:908
if(!(ptr_align%ac->ptr_align)&&samples_align >=aligned_len)