001/*
002 * $HeadURL: http://juliusdavies.ca/svn/not-yet-commons-ssl/tags/commons-ssl-0.3.9/src/java/org/apache/commons/ssl/Base64.java $
003 * $Revision: 121 $
004 * $Date: 2007-11-13 21:26:57 -0800 (Tue, 13 Nov 2007) $
005 *
006 * ====================================================================
007 * Licensed to the Apache Software Foundation (ASF) under one
008 * or more contributor license agreements.  See the NOTICE file
009 * distributed with this work for additional information
010 * regarding copyright ownership.  The ASF licenses this file
011 * to you under the Apache License, Version 2.0 (the
012 * "License"); you may not use this file except in compliance
013 * with the License.  You may obtain a copy of the License at
014 *
015 *   http://www.apache.org/licenses/LICENSE-2.0
016 *
017 * Unless required by applicable law or agreed to in writing,
018 * software distributed under the License is distributed on an
019 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
020 * KIND, either express or implied.  See the License for the
021 * specific language governing permissions and limitations
022 * under the License.
023 * ====================================================================
024 *
025 * This software consists of voluntary contributions made by many
026 * individuals on behalf of the Apache Software Foundation.  For more
027 * information on the Apache Software Foundation, please see
028 * <http://www.apache.org/>.
029 *
030 */
031
032package org.apache.commons.ssl;
033
034/**
035 * Provides Base64 encoding and decoding as defined by RFC 2045.
036 * <p/>
037 * <p>This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite>
038 * from RFC 2045 <cite>Multipurpose Internet Mail Extensions (MIME) Part One:
039 * Format of Internet Message Bodies</cite> by Freed and Borenstein.</p>
040 *
041 * @author Apache Software Foundation
042 * @version $Id: Base64.java 121 2007-11-14 05:26:57Z julius $
043 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
044 * @since 1.0-dev
045 */
046public class Base64 {
047
048    /**
049     * Chunk size per RFC 2045 section 6.8.
050     * <p/>
051     * <p>The character limit does not count the trailing CRLF, but counts
052     * all other characters, including any equal signs.</p>
053     *
054     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
055     */
056    static final int CHUNK_SIZE = 76;
057
058    /**
059     * Chunk separator per RFC 2045 section 2.1.
060     *
061     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
062     */
063    static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
064
065    /** The base length. */
066    static final int BASELENGTH = 255;
067
068    /** Lookup length. */
069    static final int LOOKUPLENGTH = 64;
070
071    /** Used to calculate the number of bits in a byte. */
072    static final int EIGHTBIT = 8;
073
074    /** Used when encoding something which has fewer than 24 bits. */
075    static final int SIXTEENBIT = 16;
076
077    /** Used to determine how many bits data contains. */
078    static final int TWENTYFOURBITGROUP = 24;
079
080    /** Used to get the number of Quadruples. */
081    static final int FOURBYTE = 4;
082
083    /** Used to test the sign of a byte. */
084    static final int SIGN = -128;
085
086    /** Byte used to pad output. */
087    static final byte PAD = (byte) '=';
088
089    /**
090     * Contains the Base64 values <code>0</code> through <code>63</code> accessed by using character encodings as
091     * indices.
092     * <p/>
093     * For example, <code>base64Alphabet['+']</code> returns <code>62</code>.
094     * </p>
095     * <p/>
096     * The value of undefined encodings is <code>-1</code>.
097     * </p>
098     */
099    private static byte[] base64Alphabet = new byte[BASELENGTH];
100
101    /**
102     * <p/>
103     * Contains the Base64 encodings <code>A</code> through <code>Z</code>, followed by <code>a</code> through
104     * <code>z</code>, followed by <code>0</code> through <code>9</code>, followed by <code>+</code>, and
105     * <code>/</code>.
106     * </p>
107     * <p/>
108     * This array is accessed by using character values as indices.
109     * </p>
110     * <p/>
111     * For example, <code>lookUpBase64Alphabet[62] </code> returns <code>'+'</code>.
112     * </p>
113     */
114    private static byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
115
116    // Populating the lookup and character arrays
117    static {
118        for (int i = 0; i < BASELENGTH; i++) {
119            base64Alphabet[i] = (byte) -1;
120        }
121        for (int i = 'Z'; i >= 'A'; i--) {
122            base64Alphabet[i] = (byte) (i - 'A');
123        }
124        for (int i = 'z'; i >= 'a'; i--) {
125            base64Alphabet[i] = (byte) (i - 'a' + 26);
126        }
127        for (int i = '9'; i >= '0'; i--) {
128            base64Alphabet[i] = (byte) (i - '0' + 52);
129        }
130
131        base64Alphabet['+'] = 62;
132        base64Alphabet['/'] = 63;
133
134        for (int i = 0; i <= 25; i++) {
135            lookUpBase64Alphabet[i] = (byte) ('A' + i);
136        }
137
138        for (int i = 26, j = 0; i <= 51; i++, j++) {
139            lookUpBase64Alphabet[i] = (byte) ('a' + j);
140        }
141
142        for (int i = 52, j = 0; i <= 61; i++, j++) {
143            lookUpBase64Alphabet[i] = (byte) ('0' + j);
144        }
145
146        lookUpBase64Alphabet[62] = (byte) '+';
147        lookUpBase64Alphabet[63] = (byte) '/';
148    }
149
150    /**
151     * Returns whether or not the <code>octect</code> is in the base 64 alphabet.
152     *
153     * @param b The value to test
154     * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
155     */
156    public static boolean isBase64(byte b) {
157        return (b == PAD) || (b >= 0 && base64Alphabet[b] >= 0);
158    }
159
160    /**
161     * Tests a given byte array to see if it contains
162     * only valid characters within the Base64 alphabet.
163     *
164     * @param arrayOctect byte array to test
165     * @return <code>true</code> if all bytes are valid characters in the Base64
166     *         alphabet or if the byte array is empty; false, otherwise
167     */
168    public static boolean isArrayByteBase64(byte[] arrayOctect) {
169
170        arrayOctect = discardWhitespace(arrayOctect);
171
172        int length = arrayOctect.length;
173        if (length == 0) {
174            // shouldn't a 0 length array be valid base64 data?
175            // return false;
176            return true;
177        }
178        for (int i = 0; i < length; i++) {
179            if (!isBase64(arrayOctect[i])) {
180                return false;
181            }
182        }
183        return true;
184    }
185
186    /**
187     * Encodes binary data using the base64 algorithm but
188     * does not chunk the output.
189     *
190     * @param binaryData binary data to encode
191     * @return Base64 characters
192     */
193    public static byte[] encodeBase64(byte[] binaryData) {
194        return encodeBase64(binaryData, false);
195    }
196
197    /**
198     * Encodes binary data using the base64 algorithm and chunks
199     * the encoded output into 76 character blocks
200     *
201     * @param binaryData binary data to encode
202     * @return Base64 characters chunked in 76 character blocks
203     */
204    public static byte[] encodeBase64Chunked(byte[] binaryData) {
205        return encodeBase64(binaryData, true);
206    }
207
208
209    /**
210     * Decodes an Object using the base64 algorithm.  This method
211     * is provided in order to satisfy the requirements of the
212     * Decoder interface, and will throw a DecoderException if the
213     * supplied object is not of type byte[].
214     *
215     * @param pObject Object to decode
216     * @return An object (of type byte[]) containing the
217     *         binary data which corresponds to the byte[] supplied.
218     * @throws IllegalArgumentException if the parameter supplied is not
219     *                                  of type byte[]
220     */
221    public Object decode(Object pObject) throws IllegalArgumentException {
222        if (!(pObject instanceof byte[])) {
223            throw new IllegalArgumentException("Parameter supplied to Base64 decode is not a byte[]");
224        }
225        return decode((byte[]) pObject);
226    }
227
228    /**
229     * Decodes a byte[] containing containing
230     * characters in the Base64 alphabet.
231     *
232     * @param pArray A byte array containing Base64 character data
233     * @return a byte array containing binary data
234     */
235    public byte[] decode(byte[] pArray) {
236        return decodeBase64(pArray);
237    }
238
239    /**
240     * Encodes binary data using the base64 algorithm, optionally
241     * chunking the output into 76 character blocks.
242     *
243     * @param binaryData Array containing binary data to encode.
244     * @param isChunked  if <code>true</code> this encoder will chunk
245     *                   the base64 output into 76 character blocks
246     * @return Base64-encoded data.
247     */
248    public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
249        int lengthDataBits = binaryData.length * EIGHTBIT;
250        int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
251        int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP;
252        byte encodedData[];
253        int encodedDataLength;
254        int nbrChunks = 0;
255
256        if (fewerThan24bits != 0) {
257            //data not divisible by 24 bit
258            encodedDataLength = (numberTriplets + 1) * 4;
259        } else {
260            // 16 or 8 bit
261            encodedDataLength = numberTriplets * 4;
262        }
263
264        // If the output is to be "chunked" into 76 character sections,
265        // for compliance with RFC 2045 MIME, then it is important to
266        // allow for extra length to account for the separator(s)
267        if (isChunked) {
268
269            nbrChunks =
270                (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math.ceil((float) encodedDataLength / CHUNK_SIZE));
271            encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length;
272        }
273
274        encodedData = new byte[encodedDataLength];
275
276        byte k, l, b1, b2, b3;
277        int dataIndex;
278        int i;
279        int encodedIndex = 0;
280        int nextSeparatorIndex = CHUNK_SIZE;
281        int chunksSoFar = 0;
282
283        //log.debug("number of triplets = " + numberTriplets);
284        for (i = 0; i < numberTriplets; i++) {
285            dataIndex = i * 3;
286            b1 = binaryData[dataIndex];
287            b2 = binaryData[dataIndex + 1];
288            b3 = binaryData[dataIndex + 2];
289
290            //log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
291
292            l = (byte) (b2 & 0x0f);
293            k = (byte) (b1 & 0x03);
294
295            byte val1 =
296                ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
297            byte val2 =
298                ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
299            byte val3 =
300                ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
301
302            encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
303            //log.debug( "val2 = " + val2 );
304            //log.debug( "k4   = " + (k<<4) );
305            //log.debug(  "vak  = " + (val2 | (k<<4)) );
306            encodedData[encodedIndex + 1] =
307                lookUpBase64Alphabet[val2 | (k << 4)];
308            encodedData[encodedIndex + 2] =
309                lookUpBase64Alphabet[(l << 2) | val3];
310            encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
311
312            encodedIndex += 4;
313
314            // If we are chunking, let's put a chunk separator down.
315            if (isChunked) {
316                // this assumes that CHUNK_SIZE % 4 == 0
317                if (encodedIndex == nextSeparatorIndex) {
318                    System.arraycopy(CHUNK_SEPARATOR,
319                        0,
320                        encodedData,
321                        encodedIndex,
322                        CHUNK_SEPARATOR.length);
323                    chunksSoFar++;
324                    nextSeparatorIndex =
325                        (CHUNK_SIZE * (chunksSoFar + 1)) +
326                        (chunksSoFar * CHUNK_SEPARATOR.length);
327                    encodedIndex += CHUNK_SEPARATOR.length;
328                }
329            }
330        }
331
332        // form integral number of 6-bit groups
333        dataIndex = i * 3;
334
335        if (fewerThan24bits == EIGHTBIT) {
336            b1 = binaryData[dataIndex];
337            k = (byte) (b1 & 0x03);
338            //log.debug("b1=" + b1);
339            //log.debug("b1<<2 = " + (b1>>2) );
340            byte val1 =
341                ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
342            encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
343            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
344            encodedData[encodedIndex + 2] = PAD;
345            encodedData[encodedIndex + 3] = PAD;
346        } else if (fewerThan24bits == SIXTEENBIT) {
347
348            b1 = binaryData[dataIndex];
349            b2 = binaryData[dataIndex + 1];
350            l = (byte) (b2 & 0x0f);
351            k = (byte) (b1 & 0x03);
352
353            byte val1 =
354                ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
355            byte val2 =
356                ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
357
358            encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
359            encodedData[encodedIndex + 1] =
360                lookUpBase64Alphabet[val2 | (k << 4)];
361            encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
362            encodedData[encodedIndex + 3] = PAD;
363        }
364
365        if (isChunked) {
366            // we also add a separator to the end of the final chunk.
367            if (chunksSoFar < nbrChunks) {
368                System.arraycopy(CHUNK_SEPARATOR,
369                    0,
370                    encodedData,
371                    encodedDataLength - CHUNK_SEPARATOR.length,
372                    CHUNK_SEPARATOR.length);
373            }
374        }
375
376        return encodedData;
377    }
378
379    /**
380     * Decodes Base64 data into octects
381     *
382     * @param base64Data Byte array containing Base64 data
383     * @return Array containing decoded data.
384     */
385    public static byte[] decodeBase64(byte[] base64Data) {
386        // RFC 2045 requires that we discard ALL non-Base64 characters
387        base64Data = discardNonBase64(base64Data);
388
389        // handle the edge case, so we don't have to worry about it later
390        if (base64Data.length == 0) {
391            return new byte[0];
392        }
393
394        int numberQuadruple = base64Data.length / FOURBYTE;
395        byte decodedData[];
396        byte b1, b2, b3, b4, marker0, marker1;
397
398        // Throw away anything not in base64Data
399        int dataIndex;
400        int encodedIndex = 0;
401        {
402            // this sizes the output array properly - rlw
403            int lastData = base64Data.length;
404            // ignore the '=' padding
405            while (base64Data[lastData - 1] == PAD) {
406                if (--lastData == 0) {
407                    return new byte[0];
408                }
409            }
410            decodedData = new byte[lastData - numberQuadruple];
411        }
412
413        for (int i = 0; i < numberQuadruple; i++) {
414            dataIndex = i * 4;
415            marker0 = base64Data[dataIndex + 2];
416            marker1 = base64Data[dataIndex + 3];
417
418            b1 = base64Alphabet[base64Data[dataIndex]];
419            b2 = base64Alphabet[base64Data[dataIndex + 1]];
420
421            if (marker0 != PAD && marker1 != PAD) {
422                //No PAD e.g 3cQl
423                b3 = base64Alphabet[marker0];
424                b4 = base64Alphabet[marker1];
425
426                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
427                decodedData[encodedIndex + 1] =
428                    (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
429                decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
430            } else if (marker0 == PAD) {
431                //Two PAD e.g. 3c[Pad][Pad]
432                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
433            } else // if ( marker1 == PAD ) (always true at this point)
434            {
435                //One PAD e.g. 3cQ[Pad]
436                b3 = base64Alphabet[marker0];
437
438                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
439                decodedData[encodedIndex + 1] =
440                    (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
441            }
442            encodedIndex += 3;
443        }
444        return decodedData;
445    }
446
447    /**
448     * Discards any whitespace from a base-64 encoded block.
449     *
450     * @param data The base-64 encoded data to discard the whitespace
451     *             from.
452     * @return The data, less whitespace (see RFC 2045).
453     */
454    static byte[] discardWhitespace(byte[] data) {
455        byte groomedData[] = new byte[data.length];
456        int bytesCopied = 0;
457
458        for (int i = 0; i < data.length; i++) {
459            switch (data[i]) {
460                case (byte) ' ':
461                case (byte) '\n':
462                case (byte) '\r':
463                case (byte) '\t':
464                    break;
465                default:
466                    groomedData[bytesCopied++] = data[i];
467            }
468        }
469
470        byte packedData[] = new byte[bytesCopied];
471
472        System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
473
474        return packedData;
475    }
476
477    /**
478     * Discards any characters outside of the base64 alphabet, per
479     * the requirements on page 25 of RFC 2045 - "Any characters
480     * outside of the base64 alphabet are to be ignored in base64
481     * encoded data."
482     *
483     * @param data The base-64 encoded data to groom
484     * @return The data, less non-base64 characters (see RFC 2045).
485     */
486    static byte[] discardNonBase64(byte[] data) {
487        byte groomedData[] = new byte[data.length];
488        int bytesCopied = 0;
489
490        for (int i = 0; i < data.length; i++) {
491            if (isBase64(data[i])) {
492                groomedData[bytesCopied++] = data[i];
493            }
494        }
495
496        byte packedData[] = new byte[bytesCopied];
497
498        System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
499
500        return packedData;
501    }
502
503    // Implementation of the Encoder Interface
504
505    /**
506     * Encodes an Object using the base64 algorithm.  This method
507     * is provided in order to satisfy the requirements of the
508     * Encoder interface, and will throw an EncoderException if the
509     * supplied object is not of type byte[].
510     *
511     * @param pObject Object to encode
512     * @return An object (of type byte[]) containing the
513     *         base64 encoded data which corresponds to the byte[] supplied.
514     * @throws IllegalArgumentException if the parameter supplied is not
515     *                                  of type byte[]
516     */
517    public Object encode(Object pObject) throws IllegalArgumentException {
518        if (!(pObject instanceof byte[])) {
519            throw new IllegalArgumentException("Parameter supplied to Base64 encode is not a byte[]");
520        }
521        return encode((byte[]) pObject);
522    }
523
524    /**
525     * Encodes a byte[] containing binary data, into a byte[] containing
526     * characters in the Base64 alphabet.
527     *
528     * @param pArray a byte array containing binary data
529     * @return A byte array containing only Base64 character data
530     */
531    public byte[] encode(byte[] pArray) {
532        return encodeBase64(pArray, false);
533    }
534
535}