source: rtems/cpukit/libfs/src/dosfs/msdos_conv_utf8.c @ a8007dc2

4.115
Last change on this file since a8007dc2 was a8007dc2, checked in by Gedare Bloom <gedare@…>, on 09/05/13 at 17:15:33

dosfs: Unsigned compared against 0

Change the type for storing the return from iconv to be signed.

  • Property mode set to 100644
File size: 6.9 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup DOSFS
5 *
6 * @brief UTF-8 Converter
7 */
8
9/*
10 * Copyright (c) 2013 embedded brains GmbH.  All rights reserved.
11 *
12 *  embedded brains GmbH
13 *  Dornierstr. 4
14 *  82178 Puchheim
15 *  Germany
16 *  <rtems@embedded-brains.de>
17 *
18 * The license and distribution terms for this file may be
19 * found in the file LICENSE in this distribution or at
20 * http://www.rtems.com/license/LICENSE.
21 */
22
23#include <stddef.h>
24#include <assert.h>
25#include <errno.h>
26#include <iconv.h>
27#include <rtems/dosfs.h>
28#include <utf8proc/utf8proc.h>
29#include "msdos.h"
30
31#define INVALID_ICONV_DESC ( (iconv_t) -1 )
32
33typedef struct {
34  /*
35   * This structure must be the first field, since otherwise the cast
36   * operations later in the file are invalid.
37   */
38  rtems_dosfs_convert_control super;
39
40  iconv_t desc_codepage_to_utf8;
41  iconv_t desc_utf8_to_codepage;
42  iconv_t desc_utf16_to_utf8;
43  iconv_t desc_utf8_to_utf16;
44  uint8_t buffer[MSDOS_NAME_MAX_UTF8_LFN_BYTES];
45} msdos_utf8_convert_control;
46
47static int msdos_utf8_convert_with_iconv(
48  iconv_t     desc,
49  const void *src,
50  size_t      src_size,
51  void       *dst,
52  size_t     *dst_size
53)
54{
55  int     eno = 0;
56  size_t  inbytes_left = src_size;
57  size_t  outbytes_left = *dst_size;
58  char   *inbuf = (void *) (uintptr_t) src;
59  char   *outbuf = dst;
60  ssize_t iconv_status;
61
62  iconv_status = iconv(
63    desc,
64    &inbuf,
65    &inbytes_left,
66    &outbuf,
67    &outbytes_left
68  );
69
70  *dst_size -= outbytes_left;
71
72  if ( iconv_status > 0 ) {
73    eno = EINVAL;
74  } else if ( iconv_status < 0 ) {
75    eno = ENOMEM;
76  }
77
78  return eno;
79}
80
81static int msdos_utf8_codepage_to_utf8(
82  rtems_dosfs_convert_control *super,
83  const char                  *src,
84  size_t                       src_size,
85  uint8_t                     *dst,
86  size_t                      *dst_size
87)
88{
89  msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
90
91  return msdos_utf8_convert_with_iconv(
92    self->desc_codepage_to_utf8,
93    src,
94    src_size,
95    dst,
96    dst_size
97  );
98}
99
100static int msdos_utf8_utf8_to_codepage(
101  rtems_dosfs_convert_control *super,
102  const uint8_t               *src,
103  size_t                       src_size,
104  char                        *dst,
105  size_t                      *dst_size
106)
107{
108  msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
109
110  return msdos_utf8_convert_with_iconv(
111    self->desc_utf8_to_codepage,
112    src,
113    src_size,
114    dst,
115    dst_size
116  );
117}
118
119static int msdos_utf8_utf16_to_utf8(
120  rtems_dosfs_convert_control *super,
121  const uint16_t              *src,
122  size_t                       src_size,
123  uint8_t                     *dst,
124  size_t                      *dst_size
125)
126{
127  msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
128
129  return msdos_utf8_convert_with_iconv(
130    self->desc_utf16_to_utf8,
131    src,
132    src_size,
133    dst,
134    dst_size
135  );
136}
137
138static int msdos_utf8_utf8_to_utf16(
139  rtems_dosfs_convert_control *super,
140  const uint8_t               *src,
141  size_t                       src_size,
142  uint16_t                    *dst,
143  size_t                      *dst_size
144)
145{
146  msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
147
148  return msdos_utf8_convert_with_iconv(
149    self->desc_utf8_to_utf16,
150    src,
151    src_size,
152    dst,
153    dst_size
154  );
155}
156
157static int msdos_utf8proc_errmsg_to_errno( ssize_t errcode )
158{
159  int eno = 0;
160
161
162  switch ( errcode ) {
163    case 0:
164      eno = 0;
165      break;
166    case UTF8PROC_ERROR_NOMEM:
167      eno = ENOMEM;
168      break;
169    case UTF8PROC_ERROR_OVERFLOW:
170      eno = EOVERFLOW;
171      break;
172    case UTF8PROC_ERROR_INVALIDUTF8:
173      eno = EINVAL;
174      break;
175    case UTF8PROC_ERROR_NOTASSIGNED:
176      eno = EINVAL;
177      break;
178    case UTF8PROC_ERROR_INVALIDOPTS:
179      eno = EINVAL;
180      break;
181    default:
182      eno = ENOENT;
183      break;
184  }
185
186  return eno;
187}
188
189static int msdos_utf8_normalize_and_fold(
190  rtems_dosfs_convert_control *super,
191  const uint8_t *src,
192  const size_t   src_size,
193  uint8_t       *dst,
194  size_t        *dst_size
195)
196{
197  int      eno              = 0;
198  int32_t *unicode_buf      = (int32_t *) dst;
199  ssize_t  unicode_buf_size = *dst_size / sizeof( *unicode_buf );
200  ssize_t  unicodes_to_reencode;
201  ssize_t  result;
202
203  (void) super;
204
205  result = utf8proc_decompose(
206    src,
207    (ssize_t) src_size,
208    unicode_buf,
209    unicode_buf_size,
210    UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_CASEFOLD
211  );
212
213  if ( result >= 0 ) {
214    if ( result < unicode_buf_size ) {
215      unicodes_to_reencode = result;
216    } else {
217      unicodes_to_reencode = unicode_buf_size - 1;
218      eno = ENOMEM;
219    }
220
221    result = utf8proc_reencode(
222      unicode_buf,
223      unicodes_to_reencode,
224      UTF8PROC_STABLE | UTF8PROC_DECOMPOSE
225    );
226
227    if ( result >= 0 ) {
228      *dst_size = result;
229    } else {
230      eno = msdos_utf8proc_errmsg_to_errno( result );
231    }
232  } else {
233    eno = msdos_utf8proc_errmsg_to_errno( result );
234  }
235
236  return eno;
237}
238
239static void msdos_utf8_destroy(
240  rtems_dosfs_convert_control *super
241)
242{
243  msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
244  int                         rv;
245
246  if ( self->desc_utf16_to_utf8 != INVALID_ICONV_DESC ) {
247    rv = iconv_close( self->desc_utf16_to_utf8 );
248    assert( rv == 0 );
249  }
250
251  if ( self->desc_codepage_to_utf8 != INVALID_ICONV_DESC ) {
252    rv = iconv_close( self->desc_codepage_to_utf8 );
253    assert( rv == 0 );
254  }
255
256  if ( self->desc_utf8_to_codepage != INVALID_ICONV_DESC ) {
257    rv = iconv_close( self->desc_utf8_to_codepage );
258    assert( rv == 0 );
259  }
260
261  if ( self->desc_utf8_to_utf16 != INVALID_ICONV_DESC ) {
262    rv = iconv_close( self->desc_utf8_to_utf16 );
263    assert( rv == 0 );
264  }
265
266  free( self );
267}
268
269static const rtems_dosfs_convert_handler msdos_utf8_convert_handler = {
270  .utf8_to_codepage = msdos_utf8_utf8_to_codepage,
271  .codepage_to_utf8 = msdos_utf8_codepage_to_utf8,
272  .utf8_to_utf16 = msdos_utf8_utf8_to_utf16,
273  .utf16_to_utf8 = msdos_utf8_utf16_to_utf8,
274  .utf8_normalize_and_fold = msdos_utf8_normalize_and_fold,
275  .destroy = msdos_utf8_destroy
276};
277
278rtems_dosfs_convert_control *rtems_dosfs_create_utf8_converter(
279  const char *codepage
280)
281{
282  msdos_utf8_convert_control *self = malloc( sizeof( *self ) );
283
284  if ( self != NULL ) {
285    self->desc_codepage_to_utf8 = iconv_open( "UTF-8", codepage );
286    self->desc_utf8_to_codepage = iconv_open( codepage, "UTF-8" );
287    self->desc_utf16_to_utf8    = iconv_open( "UTF-8", "UTF-16LE" );
288    self->desc_utf8_to_utf16    = iconv_open( "UTF-16LE", "UTF-8" );
289
290    if (
291      self->desc_utf16_to_utf8 != INVALID_ICONV_DESC
292        && self->desc_utf8_to_codepage != INVALID_ICONV_DESC
293        && self->desc_codepage_to_utf8 != INVALID_ICONV_DESC
294        && self->desc_utf8_to_utf16 != INVALID_ICONV_DESC
295    ) {
296      rtems_dosfs_convert_control *super = &self->super;
297
298      super->handler = &msdos_utf8_convert_handler;
299      super->buffer.data = &self->buffer;
300      super->buffer.size = sizeof( self->buffer );
301    } else {
302      msdos_utf8_destroy( &self->super );
303      self = NULL;
304    }
305  }
306
307  return &self->super;
308}
Note: See TracBrowser for help on using the repository browser.