source: rtems/cpukit/libmisc/untar/untar.c @ ffc57e3

Last change on this file since ffc57e3 was ffc57e3, checked in by Christian Mauderer <christian.mauderer@…>, on 12/01/21 at 15:39:46

untar: Make behavior similar to GNU or BSD tar

RTEMS untar implementation had problems with overwriting or integrating
archives into existing directory structures. This patch adapts the
behavior to mimic that of a GNU tar or BSD tar and extends the tar01
test to check for the behavior. That is:

  • If a directory structure exists, the files from the archive will be integrated. Existing files are overwritten.
  • If a file exists and the archive contains a directory with the same name, the file is removed and a directory is created. In the above example: if l1/l2 is a file it will be overwritten with a new directory.
  • If a directory exists and the archive contains a file with the same name, the directory will be replaced if it is empty. If it contains files, the result is an error.
  • An archive also can contain only a file without the parent directories. If in that case one of the parent directories exists as a file extracting the archive results in an error. In the example: if l1/l2 is a file and the archive doesn't contain the directories but only the file l1/l2/x.txt that would be an error.
  • In case of an error, it is possible that the archive has been partially extracted.

Closes #4568

  • Property mode set to 100644
File size: 14.5 KB
Line 
1/**
2 * @file
3 *
4 * @brief Untar an Image
5 *
6 * @ingroup libmisc_untar_img Untar Image
7 */
8
9/*
10 *  Written by: Jake Janovetz <janovetz@tempest.ece.uiuc.edu>
11 *
12 *  Copyright 2016 Chris Johns <chrisj@rtems.org>
13 *
14 *  The license and distribution terms for this file may be
15 *  found in the file LICENSE in this distribution or at
16 *  http://www.rtems.org/license/LICENSE.
17 */
18
19#ifdef HAVE_CONFIG_H
20#include "config.h"
21#endif
22
23#include <stdbool.h>
24#include <sys/param.h>
25#include <stdio.h>
26#include <string.h>
27#include <stdlib.h>
28#include <unistd.h>
29#include <errno.h>
30#include <sys/stat.h>
31#include <fcntl.h>
32#include <rtems/untar.h>
33#include <rtems/bspIo.h>
34
35/*
36 * TAR file format:
37
38 *   Offset   Length   Contents
39 *     0    100 bytes  File name ('\0' terminated, 99 maxmum length)
40 *   100      8 bytes  File mode (in octal ascii)
41 *   108      8 bytes  User ID (in octal ascii)
42 *   116      8 bytes  Group ID (in octal ascii)
43 *   124     12 bytes  File size (s) (in octal ascii)
44 *   136     12 bytes  Modify time (in octal ascii)
45 *   148      8 bytes  Header checksum (in octal ascii)
46 *   156      1 bytes  Link flag
47 *   157    100 bytes  Linkname ('\0' terminated, 99 maxmum length)
48 *   257      8 bytes  Magic PAX ("ustar\0" + 2 bytes padding)
49 *   257      8 bytes  Magic GNU tar ("ustar  \0")
50 *   265     32 bytes  User name ('\0' terminated, 31 maxmum length)
51 *   297     32 bytes  Group name ('\0' terminated, 31 maxmum length)
52 *   329      8 bytes  Major device ID (in octal ascii)
53 *   337      8 bytes  Minor device ID (in octal ascii)
54 *   345    155 bytes  Prefix
55 *   512   (s+p)bytes  File contents (s+p) := (((s) + 511) & ~511),
56 *                     round up to 512 bytes
57 *
58 *   Checksum:
59 *   int i, sum;
60 *   char* header = tar_header_pointer;
61 *   sum = 0;
62 *   for(i = 0; i < 512; i++)
63 *       sum += 0xFF & header[i];
64 */
65
66#define MAX_NAME_FIELD_SIZE      99
67
68static int _rtems_tar_header_checksum(const char *bufr);
69
70/*
71 * This converts octal ASCII number representations into an
72 * unsigned long.  Only support 32-bit numbers for now.
73 */
74static unsigned long
75_rtems_octal2ulong(
76  const char *octascii,
77  size_t len
78)
79{
80  size_t        i;
81  unsigned long num;
82
83  num = 0;
84  for (i=0; i < len; i++) {
85    if ((octascii[i] < '0') || (octascii[i] > '9')) {
86      continue;
87    }
88    num  = num * 8 + ((unsigned long)(octascii[i] - '0'));
89  }
90  return(num);
91}
92
93/*
94 * Common error message formatter.
95 */
96static void
97Print_Error(const rtems_printer *printer, const char* message, const char* path)
98{
99  rtems_printf(printer, "untar: %s: %s: (%d) %s\n",
100               message, path, errno, strerror(errno));
101}
102
103/*
104 * Make the directory path for a file if it does not exist.
105 */
106static int
107Make_Path(const rtems_printer *printer, char *path)
108{
109  char *p;
110
111  /*
112   * Skip leading path separators.
113   */
114  while (*path == '/') {
115    ++path;
116  }
117
118  p = path;
119
120  for (; ; ++p) {
121    if (p[0] == '\0') {
122      return 0;
123    } else if (p[0] != '/') {
124      continue;
125    }
126
127    *p = '\0';
128    if (p[1] == '\0') {
129      return 0;
130    }
131
132    if (mkdir(path, S_IRWXU | S_IRWXG | S_IRWXO) != 0) {
133      if (errno == EEXIST) {
134        /* If it exists already: Check whether it is a directory */
135        struct stat sb;
136        if (lstat(path, &sb) != 0) {
137          Print_Error(printer, "lstat", path);
138          return -1;
139        } else if (!S_ISDIR(sb.st_mode)) {
140          rtems_printf(printer,
141                       "untar: mkdir: %s: exists but is not a directory\n",
142                       path);
143          return -1;
144        }
145      } else {
146        Print_Error(printer, "mkdir", path);
147        return -1;
148      }
149    }
150
151    *p = '/';
152  }
153
154  return 0;
155}
156
157int
158Untar_ProcessHeader(
159  Untar_HeaderContext *ctx,
160  const char          *bufr
161)
162{
163  int sum;
164  int hdr_chksum;
165  int retval = UNTAR_SUCCESSFUL;
166  int r;
167
168  ctx->file_name[0] = '\0';
169  ctx->file_size = 0;
170  ctx->nblocks = 0;
171  ctx->linkflag = -1;
172
173  if (strncmp(&bufr[257], "ustar", 5)) {
174    return UNTAR_SUCCESSFUL;
175  }
176
177  /*
178   * Compute the TAR checksum and check with the value in the archive.  The
179   * checksum is computed over the entire header, but the checksum field is
180   * substituted with blanks.
181   */
182  hdr_chksum = _rtems_octal2ulong(&bufr[148], 8);
183  sum        = _rtems_tar_header_checksum(bufr);
184
185  if (sum != hdr_chksum) {
186    rtems_printf(ctx->printer, "untar: file header checksum error\n");
187    return UNTAR_INVALID_CHECKSUM;
188  }
189
190  strlcpy(ctx->file_name, bufr, UNTAR_FILE_NAME_SIZE);
191
192  ctx->mode = strtoul(&bufr[100], NULL, 8);
193
194  ctx->linkflag   = bufr[156];
195  ctx->file_size = _rtems_octal2ulong(&bufr[124], 12);
196
197  /*
198   * We've decoded the header, now figure out what it contains and do something
199   * with it.
200   */
201
202  if (Make_Path(ctx->printer, ctx->file_path) != 0) {
203    retval = UNTAR_FAIL;
204  } else {
205    /*
206     * Speculatively unlink. This should unlink everything but non-empty
207     * directories or write protected stuff.
208     */
209    unlink(ctx->file_path);
210  }
211
212  if (ctx->linkflag == SYMTYPE) {
213    strlcpy(ctx->link_name, &bufr[157], sizeof(ctx->link_name));
214    rtems_printf(ctx->printer, "untar: symlink: %s -> %s\n",
215                 ctx->link_name, ctx->file_path);
216    r = symlink(ctx->link_name, ctx->file_path);
217    if (r != 0) {
218      Print_Error(ctx->printer, "symlink", ctx->file_path);
219      retval = UNTAR_FAIL;
220    }
221  } else if (ctx->linkflag == REGTYPE) {
222    rtems_printf(ctx->printer, "untar: file: %s (s:%lu,m:%04lo)\n",
223                 ctx->file_path, ctx->file_size, ctx->mode);
224    ctx->nblocks = (((ctx->file_size) + 511) & ~511) / 512;
225  } else if (ctx->linkflag == DIRTYPE) {
226    rtems_printf(ctx->printer, "untar: dir: %s\n", ctx->file_path);
227    r = mkdir(ctx->file_path, ctx->mode);
228    if (r != 0) {
229      if (errno == EEXIST) {
230        /* If it exists already: Check whether it is a directory */
231        struct stat sb;
232        if (lstat(ctx->file_path, &sb) != 0) {
233          Print_Error(ctx->printer, "lstat", ctx->file_path);
234          retval = UNTAR_FAIL;
235        } else if (!S_ISDIR(sb.st_mode)) {
236          rtems_printf(ctx->printer,
237                       "untar: mkdir: %s: exists but is not a directory\n",
238                       ctx->file_path);
239          retval = UNTAR_FAIL;
240        }
241      } else {
242        Print_Error(ctx->printer, "mkdir", ctx->file_path);
243        retval = UNTAR_FAIL;
244      }
245    }
246  }
247
248  return retval;
249}
250
251/*
252 * Function: Untar_FromMemory
253 *
254 * Description:
255 *
256 *    This is a simple subroutine used to rip links, directories, and
257 *    files out of a block of memory.
258 *
259 *
260 * Inputs:
261 *
262 *    void *  tar_buf    - Pointer to TAR buffer.
263 *    size_t  size       - Length of TAR buffer.
264 *
265 *
266 * Output:
267 *
268 *    int - UNTAR_SUCCESSFUL (0)    on successful completion.
269 *          UNTAR_INVALID_CHECKSUM  for an invalid header checksum.
270 *          UNTAR_INVALID_HEADER    for an invalid header.
271 *
272 */
273int
274Untar_FromMemory_Print(
275  void                *tar_buf,
276  size_t               size,
277  const rtems_printer *printer
278)
279{
280  int                  fd;
281  const char          *tar_ptr = (const char *)tar_buf;
282  const char          *bufr;
283  char                 buf[UNTAR_FILE_NAME_SIZE];
284  Untar_HeaderContext  ctx;
285  int                  retval = UNTAR_SUCCESSFUL;
286  unsigned long        ptr;
287
288  ctx.file_path = buf;
289  ctx.file_name = buf;
290  ctx.printer = printer;
291  rtems_printf(printer, "untar: memory at %p (%zu)\n", tar_buf, size);
292
293  ptr = 0;
294  while (true) {
295    if (ptr + 512 > size) {
296      retval = UNTAR_SUCCESSFUL;
297      break;
298    }
299
300    /* Read the header */
301    bufr = &tar_ptr[ptr];
302    ptr += 512;
303
304    retval = Untar_ProcessHeader(&ctx, bufr);
305
306    if (retval != UNTAR_SUCCESSFUL)
307      break;
308
309    if (ctx.linkflag == REGTYPE) {
310      if ((fd = open(ctx.file_path,
311                     O_TRUNC | O_CREAT | O_WRONLY, ctx.mode)) == -1) {
312        Print_Error(printer, "open", ctx.file_path);
313        ptr += 512 * ctx.nblocks;
314      } else {
315        unsigned long sizeToGo = ctx.file_size;
316        ssize_t       len;
317        ssize_t       i;
318        ssize_t       n;
319
320        /*
321         * Read out the data.  There are nblocks of data where nblocks is the
322         * file_size rounded to the nearest 512-byte boundary.
323         */
324        for (i = 0; i < ctx.nblocks; i++) {
325          len = ((sizeToGo < 512L) ? (sizeToGo) : (512L));
326          n = write(fd, &tar_ptr[ptr], len);
327          if (n != len) {
328            Print_Error(printer, "write", ctx.file_path);
329            retval  = UNTAR_FAIL;
330            break;
331          }
332          ptr += 512;
333          sizeToGo -= n;
334        }
335        close(fd);
336      }
337
338    }
339  }
340
341  return retval;
342}
343
344/*
345 * Function: Untar_FromMemory
346 *
347 * Description:
348 *
349 *    This is a simple subroutine used to rip links, directories, and
350 *    files out of a block of memory.
351 *
352 *
353 * Inputs:
354 *
355 *    void *  tar_buf    - Pointer to TAR buffer.
356 *    size_t  size       - Length of TAR buffer.
357 *
358 *
359 * Output:
360 *
361 *    int - UNTAR_SUCCESSFUL (0)    on successful completion.
362 *          UNTAR_INVALID_CHECKSUM  for an invalid header checksum.
363 *          UNTAR_INVALID_HEADER    for an invalid header.
364 *
365 */
366int
367Untar_FromMemory(
368  void   *tar_buf,
369  size_t  size
370)
371{
372  return Untar_FromMemory_Print(tar_buf, size, false);
373}
374
375/*
376 * Function: Untar_FromFile
377 *
378 * Description:
379 *
380 *    This is a simple subroutine used to rip links, directories, and
381 *    files out of a TAR file.
382 *
383 * Inputs:
384 *
385 *    const char *tar_name   - TAR filename.
386 *
387 * Output:
388 *
389 *    int - UNTAR_SUCCESSFUL (0)    on successful completion.
390 *          UNTAR_INVALID_CHECKSUM  for an invalid header checksum.
391 *          UNTAR_INVALID_HEADER    for an invalid header.
392 */
393int
394Untar_FromFile_Print(
395  const char          *tar_name,
396  const rtems_printer *printer
397)
398{
399  int                  fd;
400  char                *bufr;
401  ssize_t              n;
402  int                  retval;
403  unsigned long        i;
404  char                 buf[UNTAR_FILE_NAME_SIZE];
405  Untar_HeaderContext  ctx;
406
407  retval = UNTAR_SUCCESSFUL;
408
409  if ((fd = open(tar_name, O_RDONLY)) < 0) {
410    return UNTAR_FAIL;
411  }
412
413  bufr = (char *)malloc(512);
414  if (bufr == NULL) {
415    close(fd);
416    return(UNTAR_FAIL);
417  }
418
419  ctx.file_path = buf;
420  ctx.file_name = buf;
421  ctx.printer = printer;
422
423  while (1) {
424    /* Read the header */
425    /* If the header read fails, we just consider it the end of the tarfile. */
426    if ((n = read(fd, bufr, 512)) != 512) {
427      break;
428    }
429
430    retval = Untar_ProcessHeader(&ctx, bufr);
431
432    if (retval != UNTAR_SUCCESSFUL)
433      break;
434
435    if (ctx.linkflag == REGTYPE) {
436      int out_fd;
437
438      /*
439       * Read out the data.  There are nblocks of data where nblocks
440       * is the size rounded to the nearest 512-byte boundary.
441       */
442
443      if ((out_fd = creat(ctx.file_path, ctx.mode)) == -1) {
444        /* Couldn't create that file. Abort. */
445        retval = UNTAR_FAIL;
446        break;
447      } else {
448        for (i = 0; i < ctx.nblocks; i++) {
449          n = read(fd, bufr, 512);
450          n = MIN(n, ctx.file_size - (i * 512UL));
451          (void) write(out_fd, bufr, n);
452        }
453        close(out_fd);
454      }
455    }
456  }
457
458  free(bufr);
459  close(fd);
460
461  return retval;
462}
463
464
465void Untar_ChunkContext_Init(Untar_ChunkContext *context)
466{
467  context->base.file_path = context->buf;
468  context->base.file_name = context->buf;
469  context->state = UNTAR_CHUNK_HEADER;
470  context->done_bytes = 0;
471  context->out_fd = -1;
472}
473
474int Untar_FromChunk_Print(
475  Untar_ChunkContext *context,
476  void *chunk,
477  size_t chunk_size,
478  const rtems_printer* printer
479)
480{
481  char *buf;
482  size_t done;
483  size_t todo;
484  size_t remaining;
485  size_t consume;
486  int retval;
487
488  buf = chunk;
489  done = 0;
490  todo = chunk_size;
491
492  context->base.printer = printer;
493
494  while (todo > 0) {
495    switch (context->state) {
496      case UNTAR_CHUNK_HEADER:
497        remaining = 512 - context->done_bytes;
498        consume = MIN(remaining, todo);
499        memcpy(&context->header[context->done_bytes], &buf[done], consume);
500        context->done_bytes += consume;
501
502        if (context->done_bytes == 512) {
503          retval = Untar_ProcessHeader(
504            &context->base,
505            &context->header[0]
506          );
507
508          if (retval != UNTAR_SUCCESSFUL) {
509            context->state = UNTAR_CHUNK_ERROR;
510            return retval;
511          }
512
513          if (context->base.linkflag == REGTYPE) {
514            context->out_fd = creat(context->base.file_path,
515                                    context->base.mode);
516
517            if (context->out_fd >= 0) {
518              context->state = UNTAR_CHUNK_WRITE;
519              context->done_bytes = 0;
520            } else {
521              context->state = UNTAR_CHUNK_SKIP;
522              context->base.file_size = 512 * context->base.nblocks;
523              context->done_bytes = 0;
524            }
525          } else {
526              context->done_bytes = 0;
527          }
528        }
529
530        break;
531      case UNTAR_CHUNK_SKIP:
532        remaining = context->base.file_size - context->done_bytes;
533        consume = MIN(remaining, todo);
534        context->done_bytes += consume;
535
536        if (context->done_bytes == context->base.file_size) {
537          context->state = UNTAR_CHUNK_HEADER;
538          context->done_bytes = 0;
539        }
540
541        break;
542      case UNTAR_CHUNK_WRITE:
543        remaining = context->base.file_size - context->done_bytes;
544        consume = MIN(remaining, todo);
545        write(context->out_fd, &buf[done], consume);
546        context->done_bytes += consume;
547
548        if (context->done_bytes == context->base.file_size) {
549          close(context->out_fd);
550          context->out_fd = -1;
551          context->state = UNTAR_CHUNK_SKIP;
552          context->base.file_size = 512 * context->base.nblocks
553            - context->base.file_size;
554          context->done_bytes = 0;
555        }
556
557        break;
558      default:
559        return UNTAR_FAIL;
560    }
561
562    done += consume;
563    todo -= consume;
564  }
565
566  return UNTAR_SUCCESSFUL;
567}
568
569/*
570 * Function: Untar_FromFile
571 *
572 * Description:
573 *
574 *    This is a simple subroutine used to rip links, directories, and
575 *    files out of a TAR file.
576 *
577 * Inputs:
578 *
579 *    const char *tar_name   - TAR filename.
580 *
581 * Output:
582 *
583 *    int - UNTAR_SUCCESSFUL (0)    on successful completion.
584 *          UNTAR_INVALID_CHECKSUM  for an invalid header checksum.
585 *          UNTAR_INVALID_HEADER    for an invalid header.
586 */
587int
588Untar_FromFile(
589  const char *tar_name
590)
591{
592  return Untar_FromFile_Print(tar_name, NULL);
593}
594
595/*
596 * Compute the TAR checksum and check with the value in
597 * the archive.  The checksum is computed over the entire
598 * header, but the checksum field is substituted with blanks.
599 */
600static int
601_rtems_tar_header_checksum(
602  const char *bufr
603)
604{
605  int  i, sum;
606
607  sum = 0;
608  for (i=0; i<512; i++) {
609    if ((i >= 148) && (i < 156))
610      sum += 0xff & ' ';
611    else
612     sum += 0xff & bufr[i];
613  }
614  return(sum);
615}
Note: See TracBrowser for help on using the repository browser.