ref: baaedc4234f91e113c759596e2ea6f7b92dfdb50
parent: 2b5cc07f8b23ede7ba9b240155a7869f491d192f
author: robs <robs>
date: Thu Sep 25 09:18:55 EDT 2008
--bits, --encoding, doc clean-up
--- a/ChangeLog
+++ b/ChangeLog
@@ -21,10 +21,10 @@
ated in [F(ormat)] [E(ffect)] Replacement due after
------- ---------------------- ---------------------- -------
14.1.0 F flac: libFLAC 1.1.1 libFLAC > 1.1.1 2009-01-29
+ 14.1.0 E resample ~= rate 2009-01-29
+ 14.1.0 E polyphase ~= rate 2009-01-29
+ 14.1.0 E rabbit ~= rate 2009-01-29
14.1.0 F wve (native) wve (libsndfile) 2009-07-29
- 14.1.0 E resample ~= rate 2009-07-29
- 14.1.0 E polyphase ~= rate 2009-07-29
- 14.1.0 E rabbit ~= rate 2009-07-29
14.1.0 Behaviour whereby soxi 2009-07-29
sox -V file(s) -n
doesn't read to EOF.
@@ -34,7 +34,7 @@
Deprec- Feature [O(ption)] Removal
ated in [F(ormat)] [E(ffect)] Replacement due after
------- ---------------------- ---------------------- -------
- 14.1.1 E key pitch 14.1.1 + 6 months
+ 14.1.1 E key renamed to pitch 14.1.1 + 6 months
14.1.1 E pan ~= mixer/remix 14.1.1 + 6 months
File formats:
@@ -44,14 +44,15 @@
Effects:
+ o N.B. Changed to better default settings for `rate' effect;
+ add -s -L options to be compatible with SoX v14.1.0. (robs)
+ o New options for `rate' effect to configure phase response,
+ band-width and aliasing. (robs)
o New `riaa' effect: RIAA vinyl playback EQ. (robs)
o New `loudness' effect: gain control with ISO 226 loudness
compensation. (robs)
o New -b option for the norm effect; can be used to fix stereo
imbalance. (robs)
- o Change default settings for `rate' effect; add -s -L options to
- be compatible with SoX v14.1.0; phase response, band-width and
- aliasing now configurable; see man page for details. (robs)
o Fix broken audio pass-through with noiseprof effect. (robs)
o Improved documentation for the `stat' effect. (robs)
o New --effects-file option to read effects and arguments from
@@ -65,6 +66,8 @@
o Display (with -V) the detected file-type if it differs from the
file extension. (robs)
o New -t option for soxi; to display the detected file type. (robs)
+ o New -b/--bits, -e/--encoding alternative options for specifying
+ audio encoding parameters. (robs)
Other bug fixes:
--- a/sox.1
+++ b/sox.1
@@ -81,13 +81,13 @@
.EE
translates an audio file in Sun AU format to a Microsoft WAV file, whilst
.EX
- sox recital.au -r 12k -1 -c 1 recital.wav vol 0.7 dither
+ sox recital.au -r 12k -b 8 -c 1 recital.wav vol 0.7 dither
.EE
performs the same format translation, but also changes the audio
sampling rate & sample size, down-mixes to mono, and applies
the \fBvol\fR and \fBdither\fR effects.
.EX
- sox -r 8k -u -1 -c 1 voice-memo.raw voice-memo.wav
+ sox -r 8k -u -b 8 -c 1 voice-memo.raw voice-memo.wav
.EE
converts `raw' (a.k.a. `headerless') audio to a self-descibing file format,
.EX
@@ -141,11 +141,11 @@
typically use 96 or 192 kHz.
.TP
sample size
-The number of bits used to store each sample. The most popular is 16-bit
-(two bytes); 8-bit (one byte) was popular in the early days of computer
-audio, and is still used in telephony; 24-bit (three bytes) is used,
-primarily as an intermediate format, in the professional audio arena. Other
-sizes are also used.
+The number of bits used to store each sample. The most popular is
+16-bit (two bytes); 8-bit (one byte) was popular in the early days of
+computer audio, and is still used in telephony; 24-bit (three bytes) is
+used, primarily as an intermediate format, in the professional audio
+arena. Other sizes are also used.
.TP
data encoding
The way in which each audio sample is represented (or `encoded'). Some
@@ -153,8 +153,8 @@
some `compress' the audio data, i.e. the stored audio data takes up less
space (i.e. disk-space or transmission band-width) than the other format
parameters and the number of samples would imply. Commonly-used
-encoding types include floating-point, \(*m-law, ADPCM, signed linear,
-and FLAC.
+encoding types include floating-point, \(*m-law, ADPCM, signed-integer
+PCM, and FLAC.
.TP
channels
The number of audio channels contained in the file. One (`mono') and two
@@ -755,7 +755,18 @@
working with headerless file formats or when specifying a format
for the output file that is different to that of the input file.
.TP
-\fB\-c\fR, \fB\-\-channels\fR \fICHANNELS\fR
+\fB\-b\fR \fIBITS\fR, \fB\-\-bits\fR \fIBITS\fR
+The number of bits in each encoded sample.
+Not applicable to complex encodings, e.g. MP3, FLAC.
+Not necessary with encodings that have a fixed number of bits, e.g.
+A/\(*m-law, ADPCM.
+.TP
+\fB\-1\fR\^/\fB\-2\fR\^/\fB\-3\fR\^/\fB\-4\fR\^/\fB\-8\fR
+The number of bytes in each encoded sample. Aliases for
+\fB\-b 8\fR/\fB\-b 16\fR/\fB\-b 24\fR/\fB\-b 32\fR/\fB\-b 64\fR
+respectively.
+.TP
+\fB\-c\fR \fICHANNELS\fR, \fB\-\-channels\fR \fICHANNELS\fR
The number of audio channels in the audio file; this can be any number
greater than zero. To cause the output file to have a different number of
channels than the input file, include this option with the output file
@@ -775,6 +786,68 @@
.B mixer
effect is necessary.
.TP
+\fB\-e \fIENCODING\fR, \fB\-\-encoding\fR \fIENCODING\fR
+The audio encoding type.
+.RS
+.IP \fBsigned-integer\fR
+PCM data stored as signed (`two's complement') integers. Commonly used
+with a 16 or 24 -bit encoding size.
+A value of 0 represents minimum signal power.
+.IP \fBunsigned-integer\fR
+PCM data stored as signed (`two's complement') integers. Commonly used
+with an 8-bit encoding size. A value of 0 represents maximum signal
+power.
+.IP \fBfloating-point\fR
+PCM data stored as IEEE 753 single precision (32-bit) or double
+precision (64-bit) floating-point ('real') numbers.
+A value of 0 represents minimum signal power.
+.IP \fBa-law\fR
+International telephony standard for logarithmic encoding to 8 bits per
+sample. It has a precision equivalent to roughly 13-bit PCM and is
+sometimes encoded with reversed bit-ordering (see the
+.B \-X
+option).
+.IP \fBu-law,\ mu-law\fR
+North American telephony standard for logarithmic encoding to 8 bits per
+sample. A.k.a \(*m-law. It has a precision equivalent to roughly
+14-bit PCM and is
+sometimes encoded with reversed bit-ordering (see the
+.B \-X
+option).
+.IP \fBoki-adpcm\fR
+OKI (a.k.a. VOX, Dialogic, or Intel) 4-bit ADPCM;
+it has a precision equivalent to roughly 12-bit PCM.
+ADPCM is a form of audio compression that has a good
+compromise between audio quality and encoding/decoding speed.
+.IP \fBima-adpcm\fR
+IMA (a.k.a. DVI) 4-bit ADPCM;
+it has a precision equivalent to roughly 13-bit PCM.
+.IP \fBms-adpcm\fR
+Microsoft 4-bit ADPCM; it has a precision equivalent to roughly 14-bit
+PCM.
+.IP \fBgsm-full-rate\fR
+GSM is currently used for the vast majority of the world's digital
+wireless telephone calls. It utilises several audio
+formats with different bit-rates and associated speech quality.
+SoX has support for GSM's original 13kbps `Full Rate' audio format.
+It is usually CPU intensive to work with GSM audio.
+.RE
+.TP
+\
+Encoding names can be abbreviated where this would not be ambiguous;
+e.g. 'unsigned-integer' can be given as 'un', but not 'u' (ambiguous
+with 'u-law'). For reasons of forward compatibility, using
+abbreviations in scripts is not recommended.
+.SP
+Note that explicitly specifying other encoding types (e.g. MP3, FLAC)
+is not necessary since they can be inferred from the file type or
+header.
+.TP
+\fB\-s\fR\^/\fB\-u\fR\^/\fB\-f\fR\^/\fB\-A\fR\^/\fB\-U\fR\^/\fB\-o\fR\^/\fB\-i\fR\^/\fB\-a\fR\^/\fB\-g\fR
+Aliases for specifying the encoding types
+\fBsigned-integer\fR/\fBunsigned-integer\fR/\fBfloating-point\fR/\fBmu-law\fR/\fBa-law\fR/\fBoki-adpcm\fR/\fBima-adpcm\fR/\fBms-adpcm\fR/\fBgsm-full-rate\fR
+respectively.
+.TP
\fB\-r, \fB\-\-rate\fR \fIRATE\fR[\fBk\fR]
Gives the sample rate in Hz (or kHz if appended with `k') of the file.
To cause the output file to have
@@ -855,44 +928,6 @@
See also N.B. in section on
.B \-x
above.
-.TP
-\fB\-s\fR\^/\fB\-u\fR\^/\fB\-U\fR\^/\fB\-A\fR\^/\fB\-a\fR\^/\fB\-i\fR\^/\fB\-g\fR\^/\fB\-f\fR
-The audio data encoding is signed linear (2's complement),
-unsigned linear, \(*m-law (logarithmic), A-law (logarithmic),
-ADPCM, IMA-ADPCM, GSM, or floating-point.
-.SP
-\(*m-law (or mu-law) and A-law are the U.S. and
-international standards for logarithmic telephone audio compression.
-When uncompressed \(*m-law has roughly the precision of 14-bit PCM audio
-and A-law has roughly the precision of 13-bit PCM audio.
-.SP
-A-law and \(*m-law are sometimes encoded using reversed bit-ordering
-(i.e. MSB becomes LSB). If you need this support then you can use the
-.B \-X
-option or the pseudo
-file types of `.la' and `.lu' to inform SoX of the encoding. See
-supported file types for more information.
-.SP
-ADPCM is a form of audio compression that has a good
-compromise between good audio quality and fast encoding/decoding
-time. It is used for telephone audio compression and places were
-full fidelity is not as important. When uncompressed it has roughly
-the precision of 16-bit PCM audio. Popular version of ADPCM include
-G.726, MS ADPCM, and IMA ADPCM. The \fB\-a\fR flag has different meanings
-in different file handlers. In \fB.wav\fR files it represents MS ADPCM
-files, in all others it means G.726 ADPCM.
-IMA ADPCM is a specific form of ADPCM compression, slightly simpler
-and slightly lower fidelity than Microsoft's flavor of ADPCM.
-IMA ADPCM is also called DVI ADPCM.
-.SP
-GSM is currently used for the vast majority of the world's digital
-wireless telephone calls. It utilises several audio
-formats with different bit-rates and associated speech quality.
-SoX has support for GSM's original 13kbps `Full Rate' audio format.
-It is usually CPU intensive to work with GSM audio.
-.TP
-\fB\-1\fR\^/\fB\-2\fR\^/\fB\-3\fR\^/\fB\-4\fR\^/\fB\-8\fR
-The sample datum size is 1, 2, 3, 4, or 8 bytes; i.e. 8, 16, 24, 32, or 64 bits.
.SS Output File Format Options
These options apply only to the output file and may precede only the output
filename on the command line.
@@ -1296,6 +1331,7 @@
Delay one or more audio channels.
.I length
can specify a time or, if appended with an `s', a number of samples.
+Do not specify both time and samples delays in the same command.
For example,
.B delay 1\*d5 0 0\*d5
delays the first channel by 1\*d5 seconds, the third channel by 0\*d5
@@ -1506,10 +1542,13 @@
.DT
.TP
\fBgain \fIdB-gain\fR
-Apply an amplification or an attenuation to the audio signal.
-This is an alias for the
+Apply an amplification or an attenuation to the audio signal. The
+signal level is adjusted by the given number of dB\*mpositive amplifies
+(beware of Clipping), negative attenuates.
+.SP
+See also the
.B vol
-effect\*mhandy for those who prefer to work in dBs by default.
+effect.
.TP
\fBhighpass\fR\^|\^\fBlowpass\fR [\fB\-1\fR|\fB\-2\fR] \fIfrequency\fR[\fBk\fR]\fR [\fRwidth\fR[\fBq\fR\^|\^\fBo\fR\^|\^\fBh\fR\^|\^\fBk\fR]]
Apply a high-pass or low-pass filter with 3dB point \fIfrequency\fR.
@@ -1979,16 +2018,16 @@
.SP
Examples:
.EX
- sox input.wav -2 output.wav rate -s -a 44100 dither
+ sox input.wav -b 16 output.wav rate -s -a 44100 dither
.EE
default (high) quality resampling; overrides: steep filter, allow
-aliasing; to 44\*d1kHz sample rate; dither output to 2-byte (16-bit) WAV
+aliasing; to 44\*d1kHz sample rate; dither output to 16-bit WAV
file.
.EX
- sox input.wav -3 output.aiff rate -v -L -b 90 48k
+ sox input.wav -b 24 output.aiff rate -v -L -b 90 48k
.EE
very high quality resampling; overrides: linear phase, band-width 90%;
-to 48k sample rate; store output to 3-byte (24-bit) AIFF file.
+to 48k sample rate; store output to 24-bit AIFF file.
.TS
center;
c8 c8 c.
@@ -2884,17 +2923,17 @@
effect.
.TP
\fBpan \fIdirection\fR
-Pan the audio from one channel to another. This is done by
-changing the volume of the input channels so that it fades out on one
-channel and fades-in on another. If the number of input channels is
-different then the number of output channels then this effect tries to
-intelligently handle this. For instance, if the input contains 1 channel
-and the output contains 2 channels, then it will create the missing channel
-itself. The
+Mix the audio from one channel to another.
+Use
+.B mixer
+or
+.B remix
+instead of this effect.
+.SP
+The
.I direction
is a value from \-1 to 1. \-1 represents
-far left and 1 represents far right. Numbers in between will start the
-pan effect without totally muting the opposite channel.
+far left and 1 represents far right.
.TP
\fBpolyphase\fR [\fB\-w nut\fR\^|\^\fBham\fR] [\fB\-width \fIn\fR] [\fB\-cut-off \fIc\fR]
Change the sampling rate using `polyphase interpolation', a DSP algorithm.
--- a/src/sox.c
+++ b/src/sox.c
@@ -1393,58 +1393,59 @@
{
size_t i;
static char const * lines[] = {
-"SPECIAL FILENAMES:",
-"- stdin (infile) or stdout (outfile)",
-"-d use the default audio device (where available)",
-"-n use the null file handler; e.g. with synth or stat effect",
+"SPECIAL FILENAMES (infile, outfile):",
+"- Pipe/redirect input/output (stdin/stdout); use with -t",
+"-d Use the default audio device (where available)",
+"-n Use the `null' file handler; e.g. with synth effect",
+"http://server/file Use the given URL as input file (where supported)",
"",
"GLOBAL OPTIONS (gopts) (can be specified at any point before the first effect):",
-"--buffer BYTES set the size of all processing buffers (default 8192)",
-"--combine concatenate concatenate multiple input files (default for sox, rec)",
-"--combine sequence sequence multiple input files (default for play)",
-"--effects-file FILENAME file containing effects and options",
-"-h, --help display version number and usage information",
-"--help-effect NAME display usage of specified effect; use `all' to display all",
-"--help-format NAME display info on specified format; use `all' to display all",
-"--input-buffer BYTES override the input buffer size (default: as --buffer)",
-"--interactive prompt to overwrite output file",
-"-m, --combine mix mix multiple input files (instead of concatenating)",
-"-M, --combine merge merge multiple input files (instead of concatenating)",
-"--output single write to single output file (default)",
-"--output multiple write to multiple output file",
-"--plot gnuplot|octave generate script to plot response of filter effect",
-"-q, --no-show-progress run in quiet mode; opposite of -S",
-"--replay-gain track|album|off default: off (sox, rec), track (play)",
-"-R use default random numbers (same on each run of SoX)",
-"-S, --show-progress display progress while processing audio data",
-"--version display version number of SoX and exit",
-"-V[LEVEL] increment or set verbosity level (default 2); levels are:",
-" 1: failure messages",
-" 2: warnings",
-" 3: details of processing",
-" 4-6: increasing levels of debug messages",
-"",
+"--buffer BYTES Set the size of all processing buffers (default 8192)",
+"--combine concatenate Concatenate multiple input files (default for sox, rec)",
+"--combine sequence Sequence multiple input files (default for play)",
+"--effects-file FILENAME File containing effects and options",
+"-h, --help Display version number and usage information",
+"--help-effect NAME Show usage of effect NAME, or NAME=all for all",
+"--help-format NAME Show info on format NAME, or NAME=all for all",
+"--input-buffer BYTES Override the input buffer size (default: as --buffer)",
+"--interactive Prompt to overwrite output file",
+"-m, --combine mix Mix multiple input files (instead of concatenating)",
+"-M, --combine merge Merge multiple input files (instead of concatenating)",
+"--output single Write to single output file (default)",
+"--output multiple Write to multiple output file",
+"--plot gnuplot|octave Generate script to plot response of filter effect",
+"-q, --no-show-progress Run in quiet mode; opposite of -S",
+"--replay-gain track|album|off Default: off (sox, rec), track (play)",
+"-R Use default random numbers (same on each run of SoX)",
+"-S, --show-progress Display progress while processing audio data",
+"--version Display version number of SoX and exit",
+"-V[LEVEL] Increment or set verbosity level (default 2); levels:",
+" 1: failure messages",
+" 2: warnings",
+" 3: details of processing",
+" 4-6: increasing levels of debug messages",
"FORMAT OPTIONS (fopts):",
"Input file format options need only be supplied for files that are headerless.",
"Output files will have the same format as the input file where possible and not",
"overriden by any of various means including providing output format options.",
"",
-"-c, --channels CHANNELS number of channels of audio data; e.g. 2 = stereo",
-"-C, --compression FACTOR compression factor for output format",
-"--add-comment TEXT Append output file comment",
-"--comment TEXT Specify comment text for the output file",
-"--comment-file FILENAME file containing comment text for the output file",
-"--endian little|big|swap set endianness; swap means opposite to default",
-"-r, --rate RATE sample rate of audio",
-"-t, --type FILETYPE file type of audio",
-"-x invert auto-detected endianness",
-"-N, --reverse-nibbles nibble-order",
-"-X, --reverse-bits bit-order of data",
-"-B/-L force endianness to big/little",
-"-s/-u/-U/-A/ sample encoding: signed/unsigned/u-law/A-law",
-" -a/-i/-g/-f ADPCM/IMA ADPCM/GSM/floating point",
-"-1/-2/-3/-4/-8 sample size in bytes",
-"-v, --volume FACTOR input file volume adjustment factor (real number)",
+"-v|--volume FACTOR Input file volume adjustment factor (real number)",
+"-t|--type FILETYPE File type of audio",
+"-s/-u/-f/-U/-A/-i/-a/-g Encoding type=signed-integer/unsigned-integer/floating-",
+" point/mu-law/a-law/ima-adpcm/ms-adpcm/gsm-full-rate",
+"-e|--encoding ENCODING Set encoding (ENCODING in above list)",
+"-b|--bits BITS Encoded sample size in bits",
+"-1/-2/-3/-4/-8 Encoded sample size in bytes",
+"-N|--reverse-nibbles Encoded nibble-order",
+"-X|--reverse-bits Encoded bit-order",
+"--endian little|big|swap Encoded byte-order; swap means opposite to default",
+"-L/-B/-x Short options for the above",
+"-c|--channels CHANNELS Number of channels of audio data; e.g. 2 = stereo",
+"-r|--rate RATE Sample rate of audio",
+"-C|--compression FACTOR Compression factor for output format",
+"--add-comment TEXT Append output file comment",
+"--comment TEXT Specify comment text for the output file",
+"--comment-file FILENAME File containing comment text for the output file",
""};
display_SoX_version(stdout);
@@ -1459,7 +1460,7 @@
puts(lines[i]);
display_supported_formats();
display_supported_effects();
- printf("effopts: effect dependent; see --help-effect\n");
+ printf("EFFECT OPTIONS (effopts): effect dependent; see --help-effect\n");
exit(message != NULL);
}
@@ -1593,7 +1594,7 @@
free(text);
}
-static char *getoptstr = "+ac:dfghimnoqr:st:uv:xABC:LMNRSTUV::X12348";
+static char *getoptstr = "+ab:c:de:fghimnoqr:st:uv:xABC:LMNRSTUV::X12348";
static struct option long_options[] =
{
@@ -1613,8 +1614,10 @@
{"output" , required_argument, NULL, 0},
{"effects-file" , required_argument, NULL, 0},
+ {"bits" , required_argument, NULL, 'b'},
{"channels" , required_argument, NULL, 'c'},
{"compression" , required_argument, NULL, 'C'},
+ {"encoding" , required_argument, NULL, 'e'},
{"help" , no_argument, NULL, 'h'},
{"no-show-progress", no_argument, NULL, 'q'},
{"rate" , required_argument, NULL, 'r'},
@@ -1627,6 +1630,15 @@
{NULL, 0, NULL, 0}
};
+static int opt_index(int val)
+{
+ int i;
+ for (i = 0; long_options[i].name; ++i)
+ if (long_options[i].val == val)
+ return i;
+ return -1;
+}
+
static enum_item const combine_methods[] = {
ENUM_ITEM(sox_,sequence)
ENUM_ITEM(sox_,concatenate)
@@ -1655,6 +1667,24 @@
ENUM_ITEM(sox_plot_,gnuplot)
{0, 0}};
+enum {
+ encoding_signed_integer, encoding_unsigned_integer, encoding_floating_point,
+ encoding_ms_adpcm, encoding_ima_adpcm, encoding_oki_adpcm,
+ encoding_gsm_full_rate, encoding_u_law, encoding_a_law};
+
+static enum_item const encodings[] = {
+ {"signed-integer", encoding_signed_integer},
+ {"unsigned-integer", encoding_unsigned_integer},
+ {"floating-point", encoding_floating_point},
+ {"ms-adpcm", encoding_ms_adpcm},
+ {"ima-adpcm", encoding_ima_adpcm},
+ {"oki-adpcm", encoding_oki_adpcm},
+ {"gsm-full-rate", encoding_gsm_full_rate},
+ {"u-law", encoding_u_law},
+ {"mu-law", encoding_u_law},
+ {"a-law", encoding_a_law},
+ {0, 0}};
+
static int enum_option(int option_index, enum_item const * items)
{
enum_item const * p = find_enum_text(optarg, items);
@@ -1826,6 +1856,33 @@
if (sscanf(optarg, "%lf %c", &f->encoding.compression, &dummy) != 1) {
sox_fail("Compression value `%s' is not a number", optarg);
exit(1);
+ }
+ break;
+
+ case 'b':
+ if (sscanf(optarg, "%i %c", &i, &dummy) != 1 || i <= 0) {
+ sox_fail("Bits value `%s' is not a positive integer", optarg);
+ exit(1);
+ }
+ f->encoding.bits_per_sample = i;
+ break;
+
+ case 'e': switch (enum_option(opt_index('e'), encodings)) {
+ case encoding_signed_integer: f->encoding.encoding = SOX_ENCODING_SIGN2; break;
+ case encoding_unsigned_integer: f->encoding.encoding = SOX_ENCODING_UNSIGNED; break;
+ case encoding_floating_point: f->encoding.encoding = SOX_ENCODING_FLOAT; break;
+ case encoding_ms_adpcm: f->encoding.encoding = SOX_ENCODING_MS_ADPCM; break;
+ case encoding_ima_adpcm: f->encoding.encoding = SOX_ENCODING_IMA_ADPCM; break;
+ case encoding_oki_adpcm: f->encoding.encoding = SOX_ENCODING_OKI_ADPCM; break;
+ case encoding_gsm_full_rate: f->encoding.encoding = SOX_ENCODING_GSM; break;
+ case encoding_u_law: f->encoding.encoding = SOX_ENCODING_ULAW;
+ if (f->encoding.bits_per_sample == 0)
+ f->encoding.bits_per_sample = 8;
+ break;
+ case encoding_a_law: f->encoding.encoding = SOX_ENCODING_ALAW;
+ if (f->encoding.bits_per_sample == 0)
+ f->encoding.bits_per_sample = 8;
+ break;
}
break;