source: rtems/cpukit/libfs/src/dosfs/msdos_conv_utf8.c @ df40cc9

4.115
Last change on this file since df40cc9 was c499856, checked in by Chris Johns <chrisj@…>, on 03/20/14 at 21:10:47

Change all references of rtems.com to rtems.org.

  • Property mode set to 100644
File size: 7.3 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup DOSFS
5 *
6 * @brief UTF-8 Converter
7 */
8
9/*
10 * Copyright (c) 2013 embedded brains GmbH.  All rights reserved.
11 *
12 *  embedded brains GmbH
13 *  Dornierstr. 4
14 *  82178 Puchheim
15 *  Germany
16 *  <rtems@embedded-brains.de>
17 *
18 * The license and distribution terms for this file may be
19 * found in the file LICENSE in this distribution or at
20 * http://www.rtems.org/license/LICENSE.
21 */
22
23#include <stddef.h>
24#include <assert.h>
25#include <errno.h>
26#include <iconv.h>
27#include <rtems/dosfs.h>
28#include <utf8proc/utf8proc.h>
29#include "msdos.h"
30
31#define INVALID_ICONV_DESC ( (iconv_t) -1 )
32
33typedef struct {
34  /*
35   * This structure must be the first field, since otherwise the cast
36   * operations later in the file are invalid.
37   */
38  rtems_dosfs_convert_control super;
39
40  iconv_t desc_codepage_to_utf8;
41  iconv_t desc_utf8_to_codepage;
42  iconv_t desc_utf16_to_utf8;
43  iconv_t desc_utf8_to_utf16;
44  uint8_t buffer[MSDOS_NAME_MAX_UTF8_LFN_BYTES];
45} msdos_utf8_convert_control;
46
47static int msdos_utf8_convert_with_iconv(
48  iconv_t     desc,
49  const void *src,
50  size_t      src_size,
51  void       *dst,
52  size_t     *dst_size
53)
54{
55  int     eno;
56  size_t  inbytes_left = src_size;
57  size_t  outbytes_left = *dst_size;
58  char   *inbuf = (void *) (uintptr_t) src;
59  char   *outbuf = dst;
60  size_t  iconv_status;
61
62  iconv_status = iconv(
63    desc,
64    &inbuf,
65    &inbytes_left,
66    &outbuf,
67    &outbytes_left
68  );
69
70  *dst_size -= outbytes_left;
71
72  if ( iconv_status == 0 ) {
73    eno = 0;
74  } else if ( iconv_status == (size_t) -1 ) {
75    /*
76     * iconv() has detected an error.  The most likely reason seems to be a too
77     * small outbuf.
78    */
79    eno = ENOMEM;
80  } else {
81    /*
82     * The iconv_status contains the number of characters converted in a
83     * non-reversible way.  We want to use reversible conversions only.
84     * Characters permitted within DOSFS names seem to be reversible.
85     */
86    eno = EINVAL;
87  }
88
89  return eno;
90}
91
92static int msdos_utf8_codepage_to_utf8(
93  rtems_dosfs_convert_control *super,
94  const char                  *src,
95  size_t                       src_size,
96  uint8_t                     *dst,
97  size_t                      *dst_size
98)
99{
100  msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
101
102  return msdos_utf8_convert_with_iconv(
103    self->desc_codepage_to_utf8,
104    src,
105    src_size,
106    dst,
107    dst_size
108  );
109}
110
111static int msdos_utf8_utf8_to_codepage(
112  rtems_dosfs_convert_control *super,
113  const uint8_t               *src,
114  size_t                       src_size,
115  char                        *dst,
116  size_t                      *dst_size
117)
118{
119  msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
120
121  return msdos_utf8_convert_with_iconv(
122    self->desc_utf8_to_codepage,
123    src,
124    src_size,
125    dst,
126    dst_size
127  );
128}
129
130static int msdos_utf8_utf16_to_utf8(
131  rtems_dosfs_convert_control *super,
132  const uint16_t              *src,
133  size_t                       src_size,
134  uint8_t                     *dst,
135  size_t                      *dst_size
136)
137{
138  msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
139
140  return msdos_utf8_convert_with_iconv(
141    self->desc_utf16_to_utf8,
142    src,
143    src_size,
144    dst,
145    dst_size
146  );
147}
148
149static int msdos_utf8_utf8_to_utf16(
150  rtems_dosfs_convert_control *super,
151  const uint8_t               *src,
152  size_t                       src_size,
153  uint16_t                    *dst,
154  size_t                      *dst_size
155)
156{
157  msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
158
159  return msdos_utf8_convert_with_iconv(
160    self->desc_utf8_to_utf16,
161    src,
162    src_size,
163    dst,
164    dst_size
165  );
166}
167
168static int msdos_utf8proc_errmsg_to_errno( ssize_t errcode )
169{
170  int eno = 0;
171
172
173  switch ( errcode ) {
174    case 0:
175      eno = 0;
176      break;
177    case UTF8PROC_ERROR_NOMEM:
178      eno = ENOMEM;
179      break;
180    case UTF8PROC_ERROR_OVERFLOW:
181      eno = EOVERFLOW;
182      break;
183    case UTF8PROC_ERROR_INVALIDUTF8:
184      eno = EINVAL;
185      break;
186    case UTF8PROC_ERROR_NOTASSIGNED:
187      eno = EINVAL;
188      break;
189    case UTF8PROC_ERROR_INVALIDOPTS:
190      eno = EINVAL;
191      break;
192    default:
193      eno = ENOENT;
194      break;
195  }
196
197  return eno;
198}
199
200static int msdos_utf8_normalize_and_fold(
201  rtems_dosfs_convert_control *super,
202  const uint8_t *src,
203  const size_t   src_size,
204  uint8_t       *dst,
205  size_t        *dst_size
206)
207{
208  int      eno              = 0;
209  int32_t *unicode_buf      = (int32_t *) dst;
210  ssize_t  unicode_buf_size = *dst_size / sizeof( *unicode_buf );
211  ssize_t  unicodes_to_reencode;
212  ssize_t  result;
213
214  (void) super;
215
216  result = utf8proc_decompose(
217    src,
218    (ssize_t) src_size,
219    unicode_buf,
220    unicode_buf_size,
221    UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_CASEFOLD
222  );
223
224  if ( result >= 0 ) {
225    if ( result < unicode_buf_size ) {
226      unicodes_to_reencode = result;
227    } else {
228      unicodes_to_reencode = unicode_buf_size - 1;
229      eno = ENOMEM;
230    }
231
232    result = utf8proc_reencode(
233      unicode_buf,
234      unicodes_to_reencode,
235      UTF8PROC_STABLE | UTF8PROC_DECOMPOSE
236    );
237
238    if ( result >= 0 ) {
239      *dst_size = result;
240    } else {
241      eno = msdos_utf8proc_errmsg_to_errno( result );
242    }
243  } else {
244    eno = msdos_utf8proc_errmsg_to_errno( result );
245  }
246
247  return eno;
248}
249
250static void msdos_utf8_destroy(
251  rtems_dosfs_convert_control *super
252)
253{
254  msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
255  int                         rv;
256
257  if ( self->desc_utf16_to_utf8 != INVALID_ICONV_DESC ) {
258    rv = iconv_close( self->desc_utf16_to_utf8 );
259    assert( rv == 0 );
260  }
261
262  if ( self->desc_codepage_to_utf8 != INVALID_ICONV_DESC ) {
263    rv = iconv_close( self->desc_codepage_to_utf8 );
264    assert( rv == 0 );
265  }
266
267  if ( self->desc_utf8_to_codepage != INVALID_ICONV_DESC ) {
268    rv = iconv_close( self->desc_utf8_to_codepage );
269    assert( rv == 0 );
270  }
271
272  if ( self->desc_utf8_to_utf16 != INVALID_ICONV_DESC ) {
273    rv = iconv_close( self->desc_utf8_to_utf16 );
274    assert( rv == 0 );
275  }
276
277  free( self );
278}
279
280static const rtems_dosfs_convert_handler msdos_utf8_convert_handler = {
281  .utf8_to_codepage = msdos_utf8_utf8_to_codepage,
282  .codepage_to_utf8 = msdos_utf8_codepage_to_utf8,
283  .utf8_to_utf16 = msdos_utf8_utf8_to_utf16,
284  .utf16_to_utf8 = msdos_utf8_utf16_to_utf8,
285  .utf8_normalize_and_fold = msdos_utf8_normalize_and_fold,
286  .destroy = msdos_utf8_destroy
287};
288
289rtems_dosfs_convert_control *rtems_dosfs_create_utf8_converter(
290  const char *codepage
291)
292{
293  msdos_utf8_convert_control *self = malloc( sizeof( *self ) );
294
295  if ( self != NULL ) {
296    self->desc_codepage_to_utf8 = iconv_open( "UTF-8", codepage );
297    self->desc_utf8_to_codepage = iconv_open( codepage, "UTF-8" );
298    self->desc_utf16_to_utf8    = iconv_open( "UTF-8", "UTF-16LE" );
299    self->desc_utf8_to_utf16    = iconv_open( "UTF-16LE", "UTF-8" );
300
301    if (
302      self->desc_utf16_to_utf8 != INVALID_ICONV_DESC
303        && self->desc_utf8_to_codepage != INVALID_ICONV_DESC
304        && self->desc_codepage_to_utf8 != INVALID_ICONV_DESC
305        && self->desc_utf8_to_utf16 != INVALID_ICONV_DESC
306    ) {
307      rtems_dosfs_convert_control *super = &self->super;
308
309      super->handler = &msdos_utf8_convert_handler;
310      super->buffer.data = &self->buffer;
311      super->buffer.size = sizeof( self->buffer );
312    } else {
313      msdos_utf8_destroy( &self->super );
314      self = NULL;
315    }
316  }
317
318  return &self->super;
319}
Note: See TracBrowser for help on using the repository browser.