Libav
libspeexenc.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2009 Justin Ruggles
3  * Copyright (c) 2009 Xuggle Incorporated
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
84 #include <speex/speex.h>
85 #include <speex/speex_header.h>
86 #include <speex/speex_stereo.h>
87 
89 #include "libavutil/common.h"
90 #include "libavutil/opt.h"
91 #include "avcodec.h"
92 #include "internal.h"
93 #include "audio_frame_queue.h"
94 
95 typedef struct {
96  AVClass *class;
97  SpeexBits bits;
98  SpeexHeader header;
99  void *enc_state;
101  float vbr_quality;
103  int abr;
104  int vad;
105  int dtx;
109 
112 {
113  const char *mode_str = "unknown";
114 
115  av_log(avctx, AV_LOG_DEBUG, "channels: %d\n", avctx->channels);
116  switch (s->header.mode) {
117  case SPEEX_MODEID_NB: mode_str = "narrowband"; break;
118  case SPEEX_MODEID_WB: mode_str = "wideband"; break;
119  case SPEEX_MODEID_UWB: mode_str = "ultra-wideband"; break;
120  }
121  av_log(avctx, AV_LOG_DEBUG, "mode: %s\n", mode_str);
122  if (s->header.vbr) {
123  av_log(avctx, AV_LOG_DEBUG, "rate control: VBR\n");
124  av_log(avctx, AV_LOG_DEBUG, " quality: %f\n", s->vbr_quality);
125  } else if (s->abr) {
126  av_log(avctx, AV_LOG_DEBUG, "rate control: ABR\n");
127  av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
128  } else {
129  av_log(avctx, AV_LOG_DEBUG, "rate control: CBR\n");
130  av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
131  }
132  av_log(avctx, AV_LOG_DEBUG, "complexity: %d\n",
133  avctx->compression_level);
134  av_log(avctx, AV_LOG_DEBUG, "frame size: %d samples\n",
135  avctx->frame_size);
136  av_log(avctx, AV_LOG_DEBUG, "frames per packet: %d\n",
137  s->frames_per_packet);
138  av_log(avctx, AV_LOG_DEBUG, "packet size: %d\n",
139  avctx->frame_size * s->frames_per_packet);
140  av_log(avctx, AV_LOG_DEBUG, "voice activity detection: %d\n", s->vad);
141  av_log(avctx, AV_LOG_DEBUG, "discontinuous transmission: %d\n", s->dtx);
142 }
143 
145 {
146  LibSpeexEncContext *s = avctx->priv_data;
147  const SpeexMode *mode;
148  uint8_t *header_data;
149  int header_size;
150  int32_t complexity;
151 
152  /* channels */
153  if (avctx->channels < 1 || avctx->channels > 2) {
154  av_log(avctx, AV_LOG_ERROR, "Invalid channels (%d). Only stereo and "
155  "mono are supported\n", avctx->channels);
156  return AVERROR(EINVAL);
157  }
158 
159  /* sample rate and encoding mode */
160  switch (avctx->sample_rate) {
161  case 8000: mode = &speex_nb_mode; break;
162  case 16000: mode = &speex_wb_mode; break;
163  case 32000: mode = &speex_uwb_mode; break;
164  default:
165  av_log(avctx, AV_LOG_ERROR, "Sample rate of %d Hz is not supported. "
166  "Resample to 8, 16, or 32 kHz.\n", avctx->sample_rate);
167  return AVERROR(EINVAL);
168  }
169 
170  /* initialize libspeex */
171  s->enc_state = speex_encoder_init(mode);
172  if (!s->enc_state) {
173  av_log(avctx, AV_LOG_ERROR, "Error initializing libspeex\n");
174  return -1;
175  }
176  speex_init_header(&s->header, avctx->sample_rate, avctx->channels, mode);
177 
178  /* rate control method and parameters */
179  if (avctx->flags & CODEC_FLAG_QSCALE) {
180  /* VBR */
181  s->header.vbr = 1;
182  s->vad = 1; /* VAD is always implicitly activated for VBR */
183  speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR, &s->header.vbr);
184  s->vbr_quality = av_clipf(avctx->global_quality / (float)FF_QP2LAMBDA,
185  0.0f, 10.0f);
186  speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR_QUALITY, &s->vbr_quality);
187  } else {
188  s->header.bitrate = avctx->bit_rate;
189  if (avctx->bit_rate > 0) {
190  /* CBR or ABR by bitrate */
191  if (s->abr) {
192  speex_encoder_ctl(s->enc_state, SPEEX_SET_ABR,
193  &s->header.bitrate);
194  speex_encoder_ctl(s->enc_state, SPEEX_GET_ABR,
195  &s->header.bitrate);
196  } else {
197  speex_encoder_ctl(s->enc_state, SPEEX_SET_BITRATE,
198  &s->header.bitrate);
199  speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
200  &s->header.bitrate);
201  }
202  } else {
203  /* CBR by quality */
204  speex_encoder_ctl(s->enc_state, SPEEX_SET_QUALITY,
205  &s->cbr_quality);
206  speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
207  &s->header.bitrate);
208  }
209  /* stereo side information adds about 800 bps to the base bitrate */
210  /* TODO: this should be calculated exactly */
211  avctx->bit_rate = s->header.bitrate + (avctx->channels == 2 ? 800 : 0);
212  }
213 
214  /* VAD is activated with VBR or can be turned on by itself */
215  if (s->vad)
216  speex_encoder_ctl(s->enc_state, SPEEX_SET_VAD, &s->vad);
217 
218  /* Activiting Discontinuous Transmission */
219  if (s->dtx) {
220  speex_encoder_ctl(s->enc_state, SPEEX_SET_DTX, &s->dtx);
221  if (!(s->abr || s->vad || s->header.vbr))
222  av_log(avctx, AV_LOG_WARNING, "DTX is not much of use without ABR, VAD or VBR\n");
223  }
224 
225  /* set encoding complexity */
227  complexity = av_clip(avctx->compression_level, 0, 10);
228  speex_encoder_ctl(s->enc_state, SPEEX_SET_COMPLEXITY, &complexity);
229  }
230  speex_encoder_ctl(s->enc_state, SPEEX_GET_COMPLEXITY, &complexity);
231  avctx->compression_level = complexity;
232 
233  /* set packet size */
234  avctx->frame_size = s->header.frame_size;
235  s->header.frames_per_packet = s->frames_per_packet;
236 
237  /* set encoding delay */
238  speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->delay);
239  ff_af_queue_init(avctx, &s->afq);
240 
241  /* create header packet bytes from header struct */
242  /* note: libspeex allocates the memory for header_data, which is freed
243  below with speex_header_free() */
244  header_data = speex_header_to_packet(&s->header, &header_size);
245 
246  /* allocate extradata and coded_frame */
247  avctx->extradata = av_malloc(header_size + FF_INPUT_BUFFER_PADDING_SIZE);
248  if (!avctx->extradata) {
249  speex_header_free(header_data);
250  speex_encoder_destroy(s->enc_state);
251  av_log(avctx, AV_LOG_ERROR, "memory allocation error\n");
252  return AVERROR(ENOMEM);
253  }
254 
255  /* copy header packet to extradata */
256  memcpy(avctx->extradata, header_data, header_size);
257  avctx->extradata_size = header_size;
258  speex_header_free(header_data);
259 
260  /* init libspeex bitwriter */
261  speex_bits_init(&s->bits);
262 
263  print_enc_params(avctx, s);
264  return 0;
265 }
266 
267 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
268  const AVFrame *frame, int *got_packet_ptr)
269 {
270  LibSpeexEncContext *s = avctx->priv_data;
271  int16_t *samples = frame ? (int16_t *)frame->data[0] : NULL;
272  int ret;
273 
274  if (samples) {
275  /* encode Speex frame */
276  if (avctx->channels == 2)
277  speex_encode_stereo_int(samples, s->header.frame_size, &s->bits);
278  speex_encode_int(s->enc_state, samples, &s->bits);
279  s->pkt_frame_count++;
280  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
281  return ret;
282  } else {
283  /* handle end-of-stream */
284  if (!s->pkt_frame_count)
285  return 0;
286  /* add extra terminator codes for unused frames in last packet */
287  while (s->pkt_frame_count < s->frames_per_packet) {
288  speex_bits_pack(&s->bits, 15, 5);
289  s->pkt_frame_count++;
290  }
291  }
292 
293  /* write output if all frames for the packet have been encoded */
294  if (s->pkt_frame_count == s->frames_per_packet) {
295  s->pkt_frame_count = 0;
296  if ((ret = ff_alloc_packet(avpkt, speex_bits_nbytes(&s->bits)))) {
297  av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
298  return ret;
299  }
300  ret = speex_bits_write(&s->bits, avpkt->data, avpkt->size);
301  speex_bits_reset(&s->bits);
302 
303  /* Get the next frame pts/duration */
305  &avpkt->pts, &avpkt->duration);
306 
307  avpkt->size = ret;
308  *got_packet_ptr = 1;
309  return 0;
310  }
311  return 0;
312 }
313 
315 {
316  LibSpeexEncContext *s = avctx->priv_data;
317 
318  speex_bits_destroy(&s->bits);
319  speex_encoder_destroy(s->enc_state);
320 
321  ff_af_queue_close(&s->afq);
322  av_freep(&avctx->extradata);
323 
324  return 0;
325 }
326 
327 #define OFFSET(x) offsetof(LibSpeexEncContext, x)
328 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
329 static const AVOption options[] = {
330  { "abr", "Use average bit rate", OFFSET(abr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
331  { "cbr_quality", "Set quality value (0 to 10) for CBR", OFFSET(cbr_quality), AV_OPT_TYPE_INT, { .i64 = 8 }, 0, 10, AE },
332  { "frames_per_packet", "Number of frames to encode in each packet", OFFSET(frames_per_packet), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 8, AE },
333  { "vad", "Voice Activity Detection", OFFSET(vad), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
334  { "dtx", "Discontinuous Transmission", OFFSET(dtx), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
335  { NULL },
336 };
337 
338 static const AVClass class = {
339  .class_name = "libspeex",
340  .item_name = av_default_item_name,
341  .option = options,
343 };
344 
345 static const AVCodecDefault defaults[] = {
346  { "b", "0" },
347  { "compression_level", "3" },
348  { NULL },
349 };
350 
352  .name = "libspeex",
353  .long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"),
354  .type = AVMEDIA_TYPE_AUDIO,
355  .id = AV_CODEC_ID_SPEEX,
356  .priv_data_size = sizeof(LibSpeexEncContext),
357  .init = encode_init,
358  .encode2 = encode_frame,
359  .close = encode_close,
360  .capabilities = CODEC_CAP_DELAY,
361  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
363  .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO,
365  0 },
366  .supported_samplerates = (const int[]){ 8000, 16000, 32000, 0 },
367  .priv_class = &class,
368  .defaults = defaults,
369 };
int pkt_frame_count
frame count for the current packet
Definition: libspeexenc.c:106
void * av_malloc(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:62
#define FF_COMPRESSION_DEFAULT
Definition: avcodec.h:1137
static av_cold int encode_init(AVCodecContext *avctx)
Definition: libspeexenc.c:144
This structure describes decoded (raw) audio or video data.
Definition: frame.h:135
AVOption.
Definition: opt.h:234
static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: libspeexenc.c:267
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:129
int size
Definition: avcodec.h:974
int cbr_quality
CBR quality 0 to 10.
Definition: libspeexenc.c:102
int vad
flag to enable VAD
Definition: libspeexenc.c:104
SpeexBits bits
libspeex bitwriter context
Definition: libspeexenc.c:97
static av_cold int encode_close(AVCodecContext *avctx)
Definition: libspeexenc.c:314
#define AV_CH_LAYOUT_STEREO
int dtx
flag to enable DTX
Definition: libspeexenc.c:105
AVCodec.
Definition: avcodec.h:2796
void av_freep(void *arg)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc() and set the pointer ...
Definition: mem.c:198
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:38
uint8_t
#define av_cold
Definition: attributes.h:66
AVOptions.
AudioFrameQueue afq
frame queue
Definition: libspeexenc.c:107
av_cold void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
Initialize AudioFrameQueue.
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
Definition: avcodec.h:1164
AVCodec ff_libspeex_encoder
Definition: libspeexenc.c:351
uint8_t * data
Definition: avcodec.h:973
int duration
Duration of this packet in AVStream->time_base units, 0 if unknown.
Definition: avcodec.h:991
int frames_per_packet
number of frames to encode in each packet
Definition: libspeexenc.c:100
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:123
static const AVCodecDefault defaults[]
Definition: libspeexenc.c:345
#define CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: avcodec.h:713
#define AVERROR(e)
Definition: error.h:43
sample_fmts
Definition: avconv_filter.c:68
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:150
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:144
int flags
CODEC_FLAG_*.
Definition: avcodec.h:1144
#define CODEC_FLAG_QSCALE
Use fixed qscale.
Definition: avcodec.h:611
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:168
const char * name
Name of the codec implementation.
Definition: avcodec.h:2803
#define AE
Definition: libspeexenc.c:328
int ff_af_queue_add(AudioFrameQueue *afq, const AVFrame *f)
Add a frame to the queue.
#define FF_INPUT_BUFFER_PADDING_SIZE
Required number of additionally allocated bytes at the end of the input bitstream for decoding...
Definition: avcodec.h:531
int bit_rate
the average bitrate
Definition: avcodec.h:1114
audio channel layout utility functions
int32_t
int ff_alloc_packet(AVPacket *avpkt, int size)
Check AVPacket size and/or allocate data.
Definition: utils.c:1245
LIBAVUTIL_VERSION_INT
Definition: eval.c:55
if(ac->has_optimized_func)
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:1811
NULL
Definition: eval.c:55
void * enc_state
libspeex encoder state
Definition: libspeexenc.c:99
Libavcodec external API header.
AVSampleFormat
Audio Sample Formats.
Definition: samplefmt.h:61
#define OFFSET(x)
Definition: libspeexenc.c:327
int compression_level
Definition: avcodec.h:1136
AV_SAMPLE_FMT_NONE
Definition: avconv_filter.c:68
int sample_rate
samples per second
Definition: avcodec.h:1791
av_default_item_name
Definition: dnxhdenc.c:52
main external API structure.
Definition: avcodec.h:1050
static void close(AVCodecParserContext *s)
Definition: h264_parser.c:490
int extradata_size
Definition: avcodec.h:1165
Describe the class of an AVClass context structure.
Definition: log.h:33
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:1130
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:141
common internal api header.
common internal and external API header
static av_cold void print_enc_params(AVCodecContext *avctx, LibSpeexEncContext *s)
Definition: libspeexenc.c:110
signed 16 bits
Definition: samplefmt.h:64
SpeexHeader header
libspeex header struct
Definition: libspeexenc.c:98
static const AVOption options[]
Definition: libspeexenc.c:329
static av_cold int init(AVCodecParserContext *s)
Definition: h264_parser.c:499
void * priv_data
Definition: avcodec.h:1092
void ff_af_queue_remove(AudioFrameQueue *afq, int nb_samples, int64_t *pts, int *duration)
Remove frame(s) from the queue.
int channels
number of audio channels
Definition: avcodec.h:1792
int abr
flag to enable ABR
Definition: libspeexenc.c:103
#define FF_QP2LAMBDA
factor to convert from H.263 QP to lambda
Definition: avutil.h:207
void ff_af_queue_close(AudioFrameQueue *afq)
Close AudioFrameQueue.
float vbr_quality
VBR quality 0.0 to 10.0.
Definition: libspeexenc.c:101
#define AV_CH_LAYOUT_MONO
This structure stores compressed data.
Definition: avcodec.h:950
int delay
Codec delay.
Definition: avcodec.h:1212
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: avcodec.h:966