source: rtems/cpukit/libmisc/untar/untar.c @ b6f66d9

5
Last change on this file since b6f66d9 was b6f66d9, checked in by Sebastian Huber <sebastian.huber@…>, on 11/21/19 at 07:06:28

untar: Unify untar support

Update #3823.

  • Property mode set to 100644
File size: 16.5 KB
Line 
1/**
2 * @file
3
4 * @brief Untar an Image
5 * @ingroup libmisc_untar_img Untar Image
6
7 * FIXME:
8 *   1. Symbolic links are not created.
9 *   2. Untar_FromMemory uses FILE *fp.
10 *   3. How to determine end of archive?
11
12 */
13
14/*
15 *  Written by: Jake Janovetz <janovetz@tempest.ece.uiuc.edu>
16 *
17 *  Copyright 2016 Chris Johns <chrisj@rtems.org>
18 *
19 *  The license and distribution terms for this file may be
20 *  found in the file LICENSE in this distribution or at
21 *  http://www.rtems.org/license/LICENSE.
22 */
23
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#endif
27
28#include <stdbool.h>
29#include <sys/param.h>
30#include <stdio.h>
31#include <string.h>
32#include <stdlib.h>
33#include <unistd.h>
34#include <errno.h>
35#include <sys/stat.h>
36#include <fcntl.h>
37#include <rtems/untar.h>
38#include <rtems/bspIo.h>
39
40/*
41 * TAR file format:
42
43 *   Offset   Length   Contents
44 *     0    100 bytes  File name ('\0' terminated, 99 maxmum length)
45 *   100      8 bytes  File mode (in octal ascii)
46 *   108      8 bytes  User ID (in octal ascii)
47 *   116      8 bytes  Group ID (in octal ascii)
48 *   124     12 bytes  File size (s) (in octal ascii)
49 *   136     12 bytes  Modify time (in octal ascii)
50 *   148      8 bytes  Header checksum (in octal ascii)
51 *   156      1 bytes  Link flag
52 *   157    100 bytes  Linkname ('\0' terminated, 99 maxmum length)
53 *   257      8 bytes  Magic PAX ("ustar\0" + 2 bytes padding)
54 *   257      8 bytes  Magic GNU tar ("ustar  \0")
55 *   265     32 bytes  User name ('\0' terminated, 31 maxmum length)
56 *   297     32 bytes  Group name ('\0' terminated, 31 maxmum length)
57 *   329      8 bytes  Major device ID (in octal ascii)
58 *   337      8 bytes  Minor device ID (in octal ascii)
59 *   345    155 bytes  Prefix
60 *   512   (s+p)bytes  File contents (s+p) := (((s) + 511) & ~511),
61 *                     round up to 512 bytes
62 *
63 *   Checksum:
64 *   int i, sum;
65 *   char* header = tar_header_pointer;
66 *   sum = 0;
67 *   for(i = 0; i < 512; i++)
68 *       sum += 0xFF & header[i];
69 */
70
71#define MAX_NAME_FIELD_SIZE      99
72
73static int _rtems_tar_header_checksum(const char *bufr);
74
75/*
76 * This converts octal ASCII number representations into an
77 * unsigned long.  Only support 32-bit numbers for now.
78 */
79static unsigned long
80_rtems_octal2ulong(
81  const char *octascii,
82  size_t len
83)
84{
85  size_t        i;
86  unsigned long num;
87
88  num = 0;
89  for (i=0; i < len; i++) {
90    if ((octascii[i] < '0') || (octascii[i] > '9')) {
91      continue;
92    }
93    num  = num * 8 + ((unsigned long)(octascii[i] - '0'));
94  }
95  return(num);
96}
97
98/*
99 * Common error message formatter.
100 */
101static void
102Print_Error(const rtems_printer *printer, const char* message, const char* path)
103{
104  rtems_printf(printer, "untar: %s: %s: (%d) %s\n",
105               message, path, errno, strerror(errno));
106}
107
108/*
109 * Get the type of node on in the file system if present.
110 */
111static int
112Stat_Node(const char* path)
113{
114  struct stat sb;
115  if (stat(path, &sb) < 0)
116    return -1;
117  if (S_ISDIR(sb.st_mode))
118    return DIRTYPE;
119  return REGTYPE;
120}
121
122/*
123 * Make the directory path for a file if it does not exist.
124 */
125static int
126Make_Path(const rtems_printer *printer, const char* filename, bool end_is_dir)
127{
128  char* copy = strdup(filename);
129  char* path = copy;
130
131  /*
132   * Skip leading path separators.
133   */
134  while (*path == '/')
135    ++path;
136
137  /*
138   * Any path left?
139   */
140  if (*path != '\0') {
141    bool  path_end = false;
142    char* end = path;
143    int   r;
144
145    /*
146     * Split the path into directory components. Check the node and if a file
147     * and not the end of the path remove it and create a directory. If a
148     * directory and not the end of the path decend into the directory.
149     */
150    while (!path_end) {
151      while (*end != '\0' && *end != '/')
152        ++end;
153
154      /*
155       * Are we at the end of the path?
156       */
157      if (*end == '\0')
158        path_end = true;
159
160      /*
161       * Split the path.
162       */
163      *end = '\0';
164
165      /*
166       * Get the node's status, exists, error, directory or regular? Regular
167       * means not a directory.
168       */
169      r = Stat_Node(path);
170
171      /*
172       * If there are errors other than not existing we are finished.
173       */
174      if (r < 0 && errno != ENOENT) {
175        Print_Error(printer, "stat", path);
176        return -1;
177      }
178
179      /*
180       * If a file remove and create a directory if not the end.
181       */
182      if (r == REGTYPE) {
183        r = unlink(path);
184        if (r < 0) {
185          Print_Error(printer, "unlink", path);
186          free(copy);
187          return -1;
188        }
189        if (!path_end) {
190          r = mkdir(path, S_IRWXU | S_IRWXG | S_IRWXO);
191          if (r < 0) {
192            Print_Error(printer, "mkdir (unlink)", path);
193            free(copy);
194            return -1;
195          }
196        }
197      }
198      else if (r < 0) {
199        /*
200         * Node does not exist which means the rest of the path will not exist.
201         */
202        while (!path_end) {
203          r = mkdir(path, S_IRWXU | S_IRWXG | S_IRWXO);
204          if (r < 0) {
205            Print_Error(printer, "mkdir", path);
206            free(copy);
207            return -1;
208          }
209          if (!path_end) {
210            *end = '/';
211            ++end;
212          }
213          while (*end != '\0' && *end != '/')
214            ++end;
215          if (*end == '\0')
216            path_end = true;
217        }
218      }
219      else if (path_end && r == DIRTYPE && !end_is_dir) {
220        /*
221         * We only handle a directory if at the end of the path and the end is
222         * a file. If we cannot remove the directory because it is not empty we
223         * raise an error. Otherwise this is a directory and we do nothing
224         * which lets us decend into it.
225         */
226        r = rmdir(path);
227        if (r < 0) {
228          Print_Error(printer, "rmdir", path);
229          free(copy);
230          return -1;
231        }
232      }
233
234      /*
235       * If not the end of the path put back the directory separator.
236       */
237      if (!path_end) {
238        *end = '/';
239        ++end;
240      }
241    }
242  }
243
244  free(copy);
245
246  return 0;
247}
248
249int
250Untar_ProcessHeader(
251  Untar_HeaderContext *ctx,
252  const char          *bufr
253)
254{
255  int            sum;
256  int            hdr_chksum;
257  int            retval = UNTAR_SUCCESSFUL;
258
259  ctx->file_name[0] = '\0';
260  ctx->file_size = 0;
261  ctx->nblocks = 0;
262  ctx->linkflag = -1;
263
264  if (strncmp(&bufr[257], "ustar", 5)) {
265    return UNTAR_SUCCESSFUL;
266  }
267
268  /*
269   * Compute the TAR checksum and check with the value in the archive.  The
270   * checksum is computed over the entire header, but the checksum field is
271   * substituted with blanks.
272   */
273  hdr_chksum = _rtems_octal2ulong(&bufr[148], 8);
274  sum        = _rtems_tar_header_checksum(bufr);
275
276  if (sum != hdr_chksum) {
277    rtems_printf(ctx->printer, "untar: file header checksum error\n");
278    return UNTAR_INVALID_CHECKSUM;
279  }
280
281  strlcpy(ctx->file_name, bufr, UNTAR_FILE_NAME_SIZE);
282
283  ctx->mode = strtoul(&bufr[100], NULL, 8);
284
285  ctx->linkflag   = bufr[156];
286  ctx->file_size = _rtems_octal2ulong(&bufr[124], 12);
287
288  /*
289   * We've decoded the header, now figure out what it contains and do something
290   * with it.
291   */
292  if (ctx->linkflag == SYMTYPE) {
293    strlcpy(ctx->link_name, &bufr[157], sizeof(ctx->link_name));
294    rtems_printf(ctx->printer, "untar: symlink: %s -> %s\n",
295                 ctx->link_name, ctx->file_path);
296    symlink(ctx->link_name, ctx->file_path);
297  } else if (ctx->linkflag == REGTYPE) {
298    rtems_printf(ctx->printer, "untar: file: %s (s:%lu,m:%04lo)\n",
299                 ctx->file_path, ctx->file_size, ctx->mode);
300    ctx->nblocks = (((ctx->file_size) + 511) & ~511) / 512;
301    if (Make_Path(ctx->printer, ctx->file_path, false) < 0) {
302      retval  = UNTAR_FAIL;
303    }
304  } else if (ctx->linkflag == DIRTYPE) {
305    int r;
306    rtems_printf(ctx->printer, "untar:  dir: %s\n", ctx->file_path);
307    if (Make_Path(ctx->printer, ctx->file_path, true) < 0) {
308      retval  = UNTAR_FAIL;
309    }
310    r = mkdir(ctx->file_path, S_IRWXU | S_IRWXG | S_IRWXO);
311    if (r < 0) {
312      if (errno == EEXIST) {
313        struct stat stat_buf;
314        if (stat(ctx->file_path, &stat_buf) == 0) {
315          if (S_ISDIR(stat_buf.st_mode)) {
316            r = 0;
317          } else {
318            r = unlink(ctx->file_path);
319            if (r == 0) {
320              r = mkdir(ctx->file_path, ctx->mode);
321            }
322          }
323        }
324      }
325      if (r < 0) {
326        Print_Error(ctx->printer, "mkdir", ctx->file_path);
327        retval = UNTAR_FAIL;
328      }
329    }
330  }
331
332  return retval;
333}
334
335/*
336 * Function: Untar_FromMemory
337 *
338 * Description:
339 *
340 *    This is a simple subroutine used to rip links, directories, and
341 *    files out of a block of memory.
342 *
343 *
344 * Inputs:
345 *
346 *    void *  tar_buf    - Pointer to TAR buffer.
347 *    size_t  size       - Length of TAR buffer.
348 *
349 *
350 * Output:
351 *
352 *    int - UNTAR_SUCCESSFUL (0)    on successful completion.
353 *          UNTAR_INVALID_CHECKSUM  for an invalid header checksum.
354 *          UNTAR_INVALID_HEADER    for an invalid header.
355 *
356 */
357int
358Untar_FromMemory_Print(
359  void                *tar_buf,
360  size_t               size,
361  const rtems_printer *printer
362)
363{
364  int                  fd;
365  const char          *tar_ptr = (const char *)tar_buf;
366  const char          *bufr;
367  char                 buf[UNTAR_FILE_NAME_SIZE];
368  Untar_HeaderContext  ctx;
369  int                  retval = UNTAR_SUCCESSFUL;
370  unsigned long        ptr;
371
372  ctx.file_path = buf;
373  ctx.file_name = buf;
374  ctx.printer = printer;
375  rtems_printf(printer, "untar: memory at %p (%zu)\n", tar_buf, size);
376
377  ptr = 0;
378  while (true) {
379    if (ptr + 512 > size) {
380      retval = UNTAR_SUCCESSFUL;
381      break;
382    }
383
384    /* Read the header */
385    bufr = &tar_ptr[ptr];
386    ptr += 512;
387
388    retval = Untar_ProcessHeader(&ctx, bufr);
389
390    if (retval != UNTAR_SUCCESSFUL)
391      break;
392
393    if (ctx.linkflag == REGTYPE) {
394      if ((fd = open(ctx.file_path,
395                     O_TRUNC | O_CREAT | O_WRONLY, ctx.mode)) == -1) {
396        Print_Error(printer, "open", ctx.file_path);
397        ptr += 512 * ctx.nblocks;
398      } else {
399        unsigned long sizeToGo = ctx.file_size;
400        ssize_t       len;
401        ssize_t       i;
402        ssize_t       n;
403
404        /*
405         * Read out the data.  There are nblocks of data where nblocks is the
406         * file_size rounded to the nearest 512-byte boundary.
407         */
408        for (i = 0; i < ctx.nblocks; i++) {
409          len = ((sizeToGo < 512L) ? (sizeToGo) : (512L));
410          n = write(fd, &tar_ptr[ptr], len);
411          if (n != len) {
412            Print_Error(printer, "write", ctx.file_path);
413            retval  = UNTAR_FAIL;
414            break;
415          }
416          ptr += 512;
417          sizeToGo -= n;
418        }
419        close(fd);
420      }
421
422    }
423  }
424
425  return retval;
426}
427
428/*
429 * Function: Untar_FromMemory
430 *
431 * Description:
432 *
433 *    This is a simple subroutine used to rip links, directories, and
434 *    files out of a block of memory.
435 *
436 *
437 * Inputs:
438 *
439 *    void *  tar_buf    - Pointer to TAR buffer.
440 *    size_t  size       - Length of TAR buffer.
441 *
442 *
443 * Output:
444 *
445 *    int - UNTAR_SUCCESSFUL (0)    on successful completion.
446 *          UNTAR_INVALID_CHECKSUM  for an invalid header checksum.
447 *          UNTAR_INVALID_HEADER    for an invalid header.
448 *
449 */
450int
451Untar_FromMemory(
452  void   *tar_buf,
453  size_t  size
454)
455{
456  return Untar_FromMemory_Print(tar_buf, size, false);
457}
458
459/*
460 * Function: Untar_FromFile
461 *
462 * Description:
463 *
464 *    This is a simple subroutine used to rip links, directories, and
465 *    files out of a TAR file.
466 *
467 * Inputs:
468 *
469 *    const char *tar_name   - TAR filename.
470 *
471 * Output:
472 *
473 *    int - UNTAR_SUCCESSFUL (0)    on successful completion.
474 *          UNTAR_INVALID_CHECKSUM  for an invalid header checksum.
475 *          UNTAR_INVALID_HEADER    for an invalid header.
476 */
477int
478Untar_FromFile_Print(
479  const char          *tar_name,
480  const rtems_printer *printer
481)
482{
483  int                  fd;
484  char                *bufr;
485  ssize_t              n;
486  int                  retval;
487  unsigned long        i;
488  char                 buf[UNTAR_FILE_NAME_SIZE];
489  Untar_HeaderContext  ctx;
490
491  retval = UNTAR_SUCCESSFUL;
492
493  if ((fd = open(tar_name, O_RDONLY)) < 0) {
494    return UNTAR_FAIL;
495  }
496
497  bufr = (char *)malloc(512);
498  if (bufr == NULL) {
499    close(fd);
500    return(UNTAR_FAIL);
501  }
502
503  ctx.file_path = buf;
504  ctx.file_name = buf;
505  ctx.printer = printer;
506
507  while (1) {
508    /* Read the header */
509    /* If the header read fails, we just consider it the end of the tarfile. */
510    if ((n = read(fd, bufr, 512)) != 512) {
511      break;
512    }
513
514    retval = Untar_ProcessHeader(&ctx, bufr);
515
516    if (retval != UNTAR_SUCCESSFUL)
517      break;
518
519    if (ctx.linkflag == REGTYPE) {
520      int out_fd;
521
522      /*
523       * Read out the data.  There are nblocks of data where nblocks
524       * is the size rounded to the nearest 512-byte boundary.
525       */
526
527      if ((out_fd = creat(ctx.file_path, ctx.mode)) == -1) {
528        (void) lseek(fd, SEEK_CUR, 512UL * ctx.nblocks);
529      } else {
530        for (i = 0; i < ctx.nblocks; i++) {
531          n = read(fd, bufr, 512);
532          n = MIN(n, ctx.file_size - (i * 512UL));
533          (void) write(out_fd, bufr, n);
534        }
535        close(out_fd);
536      }
537    }
538  }
539
540  free(bufr);
541  close(fd);
542
543  return retval;
544}
545
546
547void Untar_ChunkContext_Init(Untar_ChunkContext *context)
548{
549  context->base.file_path = context->buf;
550  context->base.file_name = context->buf;
551  context->state = UNTAR_CHUNK_HEADER;
552  context->done_bytes = 0;
553  context->out_fd = -1;
554}
555
556int Untar_FromChunk_Print(
557  Untar_ChunkContext *context,
558  void *chunk,
559  size_t chunk_size,
560  const rtems_printer* printer
561)
562{
563  char *buf;
564  size_t done;
565  size_t todo;
566  size_t remaining;
567  size_t consume;
568  int retval;
569
570  buf = chunk;
571  done = 0;
572  todo = chunk_size;
573
574  context->base.printer = printer;
575
576  while (todo > 0) {
577    switch (context->state) {
578      case UNTAR_CHUNK_HEADER:
579        remaining = 512 - context->done_bytes;
580        consume = MIN(remaining, todo);
581        memcpy(&context->header[context->done_bytes], &buf[done], consume);
582        context->done_bytes += consume;
583
584        if (context->done_bytes == 512) {
585          retval = Untar_ProcessHeader(
586            &context->base,
587            &context->header[0]
588          );
589
590          if (retval != UNTAR_SUCCESSFUL) {
591            context->state = UNTAR_CHUNK_ERROR;
592            return retval;
593          }
594
595          if (context->base.linkflag == REGTYPE) {
596            context->out_fd = creat(context->base.file_path,
597                                    context->base.mode);
598
599            if (context->out_fd >= 0) {
600              context->state = UNTAR_CHUNK_WRITE;
601              context->done_bytes = 0;
602            } else {
603              context->state = UNTAR_CHUNK_SKIP;
604              context->base.file_size = 512 * context->base.nblocks;
605              context->done_bytes = 0;
606            }
607          } else {
608              context->done_bytes = 0;
609          }
610        }
611
612        break;
613      case UNTAR_CHUNK_SKIP:
614        remaining = context->base.file_size - context->done_bytes;
615        consume = MIN(remaining, todo);
616        context->done_bytes += consume;
617
618        if (context->done_bytes == context->base.file_size) {
619          context->state = UNTAR_CHUNK_HEADER;
620          context->done_bytes = 0;
621        }
622
623        break;
624      case UNTAR_CHUNK_WRITE:
625        remaining = context->base.file_size - context->done_bytes;
626        consume = MIN(remaining, todo);
627        write(context->out_fd, &buf[done], consume);
628        context->done_bytes += consume;
629
630        if (context->done_bytes == context->base.file_size) {
631          close(context->out_fd);
632          context->out_fd = -1;
633          context->state = UNTAR_CHUNK_SKIP;
634          context->base.file_size = 512 * context->base.nblocks
635            - context->base.file_size;
636          context->done_bytes = 0;
637        }
638
639        break;
640      default:
641        return UNTAR_FAIL;
642    }
643
644    done += consume;
645    todo -= consume;
646  }
647
648  return UNTAR_SUCCESSFUL;
649}
650
651/*
652 * Function: Untar_FromFile
653 *
654 * Description:
655 *
656 *    This is a simple subroutine used to rip links, directories, and
657 *    files out of a TAR file.
658 *
659 * Inputs:
660 *
661 *    const char *tar_name   - TAR filename.
662 *
663 * Output:
664 *
665 *    int - UNTAR_SUCCESSFUL (0)    on successful completion.
666 *          UNTAR_INVALID_CHECKSUM  for an invalid header checksum.
667 *          UNTAR_INVALID_HEADER    for an invalid header.
668 */
669int
670Untar_FromFile(
671  const char *tar_name
672)
673{
674  return Untar_FromFile_Print(tar_name, NULL);
675}
676
677/*
678 * Compute the TAR checksum and check with the value in
679 * the archive.  The checksum is computed over the entire
680 * header, but the checksum field is substituted with blanks.
681 */
682static int
683_rtems_tar_header_checksum(
684  const char *bufr
685)
686{
687  int  i, sum;
688
689  sum = 0;
690  for (i=0; i<512; i++) {
691    if ((i >= 148) && (i < 156))
692      sum += 0xff & ' ';
693    else
694     sum += 0xff & bufr[i];
695  }
696  return(sum);
697}
Note: See TracBrowser for help on using the repository browser.