001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.nio.charset.Charset;
020import java.nio.charset.StandardCharsets;
021import java.nio.charset.UnsupportedCharsetException;
022import java.util.Collections;
023import java.util.SortedMap;
024import java.util.TreeMap;
025
026/**
027 * Charsets required of every implementation of the Java platform.
028 *
029 * From the Java documentation <a href="https://docs.oracle.com/javase/8/docs/api/java/nio/charset/Charset.html">
030 * Standard charsets</a>:
031 * <p>
032 * <cite>Every implementation of the Java platform is required to support the following character encodings. Consult
033 * the release documentation for your implementation to see if any other encodings are supported. Consult the release
034 * documentation for your implementation to see if any other encodings are supported. </cite>
035 * </p>
036 *
037 * <table>
038 * <caption>Standard Charsets</caption>
039 * <tr>
040 * <th>Charset</th>
041 * <th>Description</th>
042 * </tr>
043 * <tr>
044 * <td style="white-space: nowrap">{@code US-ASCII}</td>
045 * <td>Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</td>
046 * </tr>
047 * <tr>
048 * <td style="white-space: nowrap">{@code ISO-8859-1}</td>
049 * <td>ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</td>
050 * </tr>
051 * <tr>
052 * <td style="white-space: nowrap">{@code UTF-8}</td>
053 * <td>Eight-bit Unicode Transformation Format.</td>
054 * </tr>
055 * <tr>
056 * <td style="white-space: nowrap">{@code UTF-16BE}</td>
057 * <td>Sixteen-bit Unicode Transformation Format, big-endian byte order.</td>
058 * </tr>
059 * <tr>
060 * <td style="white-space: nowrap">{@code UTF-16LE}</td>
061 * <td>Sixteen-bit Unicode Transformation Format, little-endian byte order.</td>
062 * </tr>
063 * <tr>
064 * <td style="white-space: nowrap">{@code UTF-16}</td>
065 * <td>Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order
066 * accepted on input, big-endian used on output.)</td>
067 * </tr>
068 * </table>
069 *
070 * @see <a href="https://docs.oracle.com/javase/8/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
071 * @since 2.3
072 */
073public class Charsets {
074
075    //
076    // This class should only contain Charset instances for required encodings. This guarantees that it will load
077    // correctly and without delay on all Java platforms.
078    //
079
080    private static final SortedMap<String, Charset> STANDARD_CHARSET_MAP;
081
082    static {
083        final SortedMap<String, Charset> standardCharsetMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
084        standardCharsetMap.put(StandardCharsets.ISO_8859_1.name(), StandardCharsets.ISO_8859_1);
085        standardCharsetMap.put(StandardCharsets.US_ASCII.name(), StandardCharsets.US_ASCII);
086        standardCharsetMap.put(StandardCharsets.UTF_16.name(), StandardCharsets.UTF_16);
087        standardCharsetMap.put(StandardCharsets.UTF_16BE.name(), StandardCharsets.UTF_16BE);
088        standardCharsetMap.put(StandardCharsets.UTF_16LE.name(), StandardCharsets.UTF_16LE);
089        standardCharsetMap.put(StandardCharsets.UTF_8.name(), StandardCharsets.UTF_8);
090        STANDARD_CHARSET_MAP = Collections.unmodifiableSortedMap(standardCharsetMap);
091    }
092
093    /**
094     * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.
095     * <p>
096     * Every implementation of the Java platform is required to support this character encoding.
097     * </p>
098     *
099     * @see <a href="https://docs.oracle.com/javase/8/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
100     * @deprecated Use {@link java.nio.charset.StandardCharsets#ISO_8859_1}.
101     */
102    @Deprecated
103    public static final Charset ISO_8859_1 = StandardCharsets.ISO_8859_1;
104
105    /**
106     * <p>
107     * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set.
108     * </p>
109     * <p>
110     * Every implementation of the Java platform is required to support this character encoding.
111     * </p>
112     *
113     * @see <a href="https://docs.oracle.com/javase/8/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
114     * @deprecated Use {@link java.nio.charset.StandardCharsets#US_ASCII}.
115     */
116    @Deprecated
117    public static final Charset US_ASCII = StandardCharsets.US_ASCII;
118
119    /**
120     * <p>
121     * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark
122     * (either order accepted on input, big-endian used on output)
123     * </p>
124     * <p>
125     * Every implementation of the Java platform is required to support this character encoding.
126     * </p>
127     *
128     * @see <a href="https://docs.oracle.com/javase/8/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
129     * @deprecated Use {@link java.nio.charset.StandardCharsets#UTF_16}.
130     */
131    @Deprecated
132    public static final Charset UTF_16 = StandardCharsets.UTF_16;
133
134    /**
135     * <p>
136     * Sixteen-bit Unicode Transformation Format, big-endian byte order.
137     * </p>
138     * <p>
139     * Every implementation of the Java platform is required to support this character encoding.
140     * </p>
141     *
142     * @see <a href="https://docs.oracle.com/javase/8/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
143     * @deprecated Use {@link java.nio.charset.StandardCharsets#UTF_16BE}.
144     */
145    @Deprecated
146    public static final Charset UTF_16BE = StandardCharsets.UTF_16BE;
147
148    /**
149     * <p>
150     * Sixteen-bit Unicode Transformation Format, little-endian byte order.
151     * </p>
152     * <p>
153     * Every implementation of the Java platform is required to support this character encoding.
154     * </p>
155     *
156     * @see <a href="https://docs.oracle.com/javase/8/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
157     * @deprecated Use {@link java.nio.charset.StandardCharsets#UTF_16LE}.
158     */
159    @Deprecated
160    public static final Charset UTF_16LE = StandardCharsets.UTF_16LE;
161
162    /**
163     * <p>
164     * Eight-bit Unicode Transformation Format.
165     * </p>
166     * <p>
167     * Every implementation of the Java platform is required to support this character encoding.
168     * </p>
169     *
170     * @see <a href="https://docs.oracle.com/javase/8/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
171     * @deprecated Use {@link java.nio.charset.StandardCharsets#UTF_8}.
172     */
173    @Deprecated
174    public static final Charset UTF_8 = StandardCharsets.UTF_8;
175
176    /**
177     * Tests whether the given non-null Charset has an alias of the given name.
178     *
179     * @param charset a non-null Charset.
180     * @param charsetName The name to test.
181     * @return whether the given non-null charset name is a UTF-8 alias.
182     * @since 2.20.0
183     */
184    public static boolean isAlias(final Charset charset, final String charsetName) {
185        return charsetName != null && (charset.name().equalsIgnoreCase(charsetName) || charset.aliases().stream().anyMatch(charsetName::equalsIgnoreCase));
186    }
187
188    /**
189     * Tests whether a given encoding is UTF-8. If the given charset is null, then check the platform's default encoding.
190     *
191     * @param charset If the given charset is null, then check the platform's default encoding.
192     * @return whether a given encoding is UTF-8.
193     * @since 2.20.0
194     */
195    public static boolean isUTF8(final Charset charset) {
196        return isUTF8Alias(toCharset(charset).name());
197    }
198
199    /**
200     * Tests whether the given non-null charset name is a UTF-8 alias.
201     *
202     * @param charsetName a non-null charset name.
203     * @return whether the given non-null charset name is a UTF-8 alias.
204     */
205    private static boolean isUTF8Alias(final String charsetName) {
206        return isAlias(StandardCharsets.UTF_8, charsetName);
207    }
208
209    /**
210     * Constructs a sorted map from canonical charset names to charset objects required of every implementation of the
211     * Java platform.
212     * <p>
213     * From the Java documentation <a href="https://docs.oracle.com/javase/8/docs/api/java/nio/charset/Charset.html">
214     * Standard charsets</a>:
215     * </p>
216     *
217     * @return An immutable, case-insensitive map from canonical charset names to charset objects.
218     * @see Charset#availableCharsets()
219     * @since 2.5
220     */
221    public static SortedMap<String, Charset> requiredCharsets() {
222        return STANDARD_CHARSET_MAP;
223    }
224
225    /**
226     * Returns the given Charset or the {@link Charset#defaultCharset() default Charset} if the given Charset is null.
227     *
228     * @param charset
229     *            A charset or null.
230     * @return the given Charset or the default Charset if the given Charset is null.
231     * @see Charset#defaultCharset()
232     */
233    public static Charset toCharset(final Charset charset) {
234        return charset == null ? Charset.defaultCharset() : charset;
235    }
236
237    /**
238     * Returns the given charset if non-null, otherwise return defaultCharset.
239     *
240     * @param charset The charset to test, may be null.
241     * @param defaultCharset The charset to return if charset is null, may be null.
242     * @return a Charset.
243     * @since 2.12.0
244     */
245    public static Charset toCharset(final Charset charset, final Charset defaultCharset) {
246        return charset == null ? defaultCharset : charset;
247    }
248
249    /**
250     * Returns a Charset for the named charset. If the name is null, return the {@link Charset#defaultCharset() default Charset}.
251     *
252     * @param charsetName The name of the requested charset, may be null.
253     * @return a Charset for the named charset.
254     * @throws UnsupportedCharsetException If the named charset is unavailable (unchecked exception).
255     * @see Charset#defaultCharset()
256     */
257    public static Charset toCharset(final String charsetName) throws UnsupportedCharsetException {
258        return toCharset(charsetName, Charset.defaultCharset());
259    }
260
261    /**
262     * Returns a Charset for the named charset. If the name is null, return the given default Charset.
263     *
264     * @param charsetName The name of the requested charset, may be null.
265     * @param defaultCharset The charset to return if charsetName is null, may be null.
266     * @return a Charset for the named charset.
267     * @throws UnsupportedCharsetException If the named charset is unavailable (unchecked exception).
268     * @since 2.12.0
269     */
270    public static Charset toCharset(final String charsetName, final Charset defaultCharset) throws UnsupportedCharsetException {
271        return charsetName == null ? defaultCharset : Charset.forName(charsetName);
272    }
273
274    /**
275     * Returns a Charset for the named charset or the {@code defaultCharset}.
276     * <p>
277     * If {@code charsetName} cannot load a charset, return {@code defaultCharset}. Therefore, this method should never fail and always return a Charset.
278     * </p>
279     *
280     * @param charsetName    The name of the requested charset, may be null.
281     * @param defaultCharset The charset to return if charsetName is null or there is a problem, may be null which returns {@link Charset#defaultCharset()}.
282     * @return a Charset for the named charset or {@code defaultCharset} if any errors occur.
283     * @see Charset#defaultCharset()
284     * @since 2.20.0
285     */
286    public static Charset toCharsetDefault(final String charsetName, final Charset defaultCharset) {
287        try {
288            return toCharset(charsetName);
289        } catch (final RuntimeException ignored) {
290            return toCharset(defaultCharset);
291        }
292    }
293
294    /**
295     * Construct a new instance.
296     *
297     * @deprecated Will be private in 3.0.
298     */
299    @Deprecated
300    public Charsets() {
301        // empty
302    }
303}