source: rtems/cpukit/libmisc/untar/untar.c @ 89f8d9fc

5
Last change on this file since 89f8d9fc was d84e346b, checked in by Chris Johns <chrisj@…>, on 05/18/16 at 23:18:21

libmisc/untar: Support directory create and overwrites. Share the common code.

Support creating directories for files with a path depth greater than 1. Some
tar files can have files with a path depth greater than 1 and no directory
entry in the tar file to create a directory.

Support overwriting existing files and directories failing in a similar
way to tar on common hosts. If a file is replaced with a file delete the
file and create a new file. If a directory replaces a file remove the file
and create the directory. If a file replaces a directory remove the directory,
and if the directory is not empty and cannot be removed report an error. If a
directory alreday exists do nothing leaving the contents untouched.

The untar code now shares the common header parsing and initial processing
with the actual writes still separate. No changes to the IMFS have been made.

Updates #2415.
Closes #2207.

  • Property mode set to 100644
File size: 13.6 KB
Line 
1/**
2 * @file
3
4 * @brief Untar an Image
5 * @ingroup libmisc_untar_img Untar Image
6
7 * FIXME:
8 *   1. Symbolic links are not created.
9 *   2. Untar_FromMemory uses FILE *fp.
10 *   3. How to determine end of archive?
11
12 */
13
14/*
15 *  Written by: Jake Janovetz <janovetz@tempest.ece.uiuc.edu>
16 *
17 *  Copyright 2016 Chris Johns <chrisj@rtems.org>
18 *
19 *  The license and distribution terms for this file may be
20 *  found in the file LICENSE in this distribution or at
21 *  http://www.rtems.org/license/LICENSE.
22 */
23
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#endif
27
28#include <stdbool.h>
29#include <sys/param.h>
30#include <stdio.h>
31#include <string.h>
32#include <stdlib.h>
33#include <unistd.h>
34#include <errno.h>
35#include <sys/stat.h>
36#include <fcntl.h>
37#include <rtems/untar.h>
38#include <rtems/bspIo.h>
39
40
41/*
42 * TAR file format:
43
44 *   Offset   Length   Contents
45 *     0    100 bytes  File name ('\0' terminated, 99 maxmum length)
46 *   100      8 bytes  File mode (in octal ascii)
47 *   108      8 bytes  User ID (in octal ascii)
48 *   116      8 bytes  Group ID (in octal ascii)
49 *   124     12 bytes  File size (s) (in octal ascii)
50 *   136     12 bytes  Modify time (in octal ascii)
51 *   148      8 bytes  Header checksum (in octal ascii)
52 *   156      1 bytes  Link flag
53 *   157    100 bytes  Linkname ('\0' terminated, 99 maxmum length)
54 *   257      8 bytes  Magic PAX ("ustar\0" + 2 bytes padding)
55 *   257      8 bytes  Magic GNU tar ("ustar  \0")
56 *   265     32 bytes  User name ('\0' terminated, 31 maxmum length)
57 *   297     32 bytes  Group name ('\0' terminated, 31 maxmum length)
58 *   329      8 bytes  Major device ID (in octal ascii)
59 *   337      8 bytes  Minor device ID (in octal ascii)
60 *   345    155 bytes  Prefix
61 *   512   (s+p)bytes  File contents (s+p) := (((s) + 511) & ~511),
62 *                     round up to 512 bytes
63 *
64 *   Checksum:
65 *   int i, sum;
66 *   char* header = tar_header_pointer;
67 *   sum = 0;
68 *   for(i = 0; i < 512; i++)
69 *       sum += 0xFF & header[i];
70 */
71
72#define MAX_NAME_FIELD_SIZE      99
73
74/*
75 * This converts octal ASCII number representations into an
76 * unsigned long.  Only support 32-bit numbers for now.
77 *
78 * warning: this code is referenced in the IMFS.
79 */
80unsigned long
81_rtems_octal2ulong(
82  const char *octascii,
83  size_t len
84)
85{
86  size_t        i;
87  unsigned long num;
88
89  num = 0;
90  for (i=0; i < len; i++) {
91    if ((octascii[i] < '0') || (octascii[i] > '9')) {
92      continue;
93    }
94    num  = num * 8 + ((unsigned long)(octascii[i] - '0'));
95  }
96  return(num);
97}
98
99/*
100 * Common error message formatter.
101 */
102static void
103Print_Error(const rtems_printer *printer, const char* message, const char* path)
104{
105  rtems_printf(printer, "untar: %s: %s: (%d) %s\n",
106               message, path, errno, strerror(errno));
107}
108
109/*
110 * Get the type of node on in the file system if present.
111 */
112static int
113Stat_Node(const char* path)
114{
115  struct stat sb;
116  if (stat(path, &sb) < 0)
117    return -1;
118  if (S_ISDIR(sb.st_mode))
119    return DIRTYPE;
120  return REGTYPE;
121}
122
123/*
124 * Make the directory path for a file if it does not exist.
125 */
126static int
127Make_Path(const rtems_printer *printer, const char* filename, bool end_is_dir)
128{
129  char* copy = strdup(filename);
130  char* path = copy;
131
132  /*
133   * Skip leading path separators.
134   */
135  while (*path == '/')
136    ++path;
137
138  /*
139   * Any path left?
140   */
141  if (*path != '\0') {
142    bool  path_end = false;
143    char* end = path;
144    int   r;
145
146    /*
147     * Split the path into directory components. Check the node and if a file
148     * and not the end of the path remove it and create a directory. If a
149     * directory and not the end of the path decend into the directory.
150     */
151    while (!path_end) {
152      while (*end != '\0' && *end != '/')
153        ++end;
154
155      /*
156       * Are we at the end of the path?
157       */
158      if (*end == '\0')
159        path_end = true;
160
161      /*
162       * Split the path.
163       */
164      *end = '\0';
165
166      /*
167       * Get the node's status, exists, error, directory or regular? Regular
168       * means not a directory.
169       */
170      r = Stat_Node(path);
171
172      /*
173       * If there are errors other than not existing we are finished.
174       */
175      if (r < 0 && errno != ENOENT) {
176        Print_Error(printer, "stat", path);
177        return -1;
178      }
179
180      /*
181       * If a file remove and create a directory if not the end.
182       */
183      if (r == REGTYPE) {
184        r = unlink(path);
185        if (r < 0) {
186          Print_Error(printer, "unlink", path);
187          free(copy);
188          return -1;
189        }
190        if (!path_end) {
191          r = mkdir(path, S_IRWXU | S_IRWXG | S_IRWXO);
192          if (r < 0) {
193            Print_Error(printer, "mkdir", path);
194            free(copy);
195            return -1;
196          }
197        }
198      }
199      else if (r < 0) {
200        /*
201         * Node does not exist which means the rest of the path will not exist.
202         */
203        while (!path_end) {
204          r = mkdir(path, S_IRWXU | S_IRWXG | S_IRWXO);
205          if (r < 0) {
206            Print_Error(printer, "mkdir", path);
207            free(copy);
208            return -1;
209          }
210          if (!path_end) {
211            *end = '/';
212            ++end;
213          }
214          while (*end != '\0' && *end != '/')
215            ++end;
216          if (*end == '\0')
217            path_end = true;
218        }
219      }
220      else if (path_end && r == DIRTYPE && !end_is_dir) {
221        /*
222         * We only handle a directory if at the end of the path and the end is
223         * a file. If we cannot remove the directory because it is not empty we
224         * raise an error. Otherwise this is a directory and we do nothing
225         * which lets us decend into it.
226         */
227        r = rmdir(path);
228        if (r < 0) {
229          Print_Error(printer, "rmdir", path);
230          free(copy);
231          return -1;
232        }
233      }
234
235      /*
236       * If not the end of the path put back the directory separator.
237       */
238      if (!path_end) {
239        *end = '/';
240        ++end;
241      }
242    }
243  }
244
245  free(copy);
246
247  return 0;
248}
249
250static int
251Untar_ProcessHeader(
252  const char          *bufr,
253  char                *fname,
254  unsigned long       *file_size,
255  unsigned long       *nblocks,
256  unsigned char       *linkflag,
257  const rtems_printer *printer
258)
259{
260  char           linkname[100];
261  int            sum;
262  int            hdr_chksum;
263  int            retval = UNTAR_SUCCESSFUL;
264
265  fname[0] = '\0';
266  *file_size = 0;
267  *nblocks = 0;
268  *linkflag = -1;
269
270  if (strncmp(&bufr[257], "ustar", 5)) {
271    return UNTAR_SUCCESSFUL;
272  }
273
274  /*
275   * Compute the TAR checksum and check with the value in the archive.  The
276   * checksum is computed over the entire header, but the checksum field is
277   * substituted with blanks.
278   */
279  hdr_chksum = _rtems_octal2ulong(&bufr[148], 8);
280  sum        = _rtems_tar_header_checksum(bufr);
281
282  if (sum != hdr_chksum) {
283    rtems_printf(printer, "untar: file header checksum error\n");
284    return UNTAR_INVALID_CHECKSUM;
285  }
286
287  strncpy(fname, bufr, MAX_NAME_FIELD_SIZE);
288  fname[MAX_NAME_FIELD_SIZE] = '\0';
289
290  *linkflag   = bufr[156];
291  *file_size = _rtems_octal2ulong(&bufr[124], 12);
292
293  /*
294   * We've decoded the header, now figure out what it contains and do something
295   * with it.
296   */
297  if (*linkflag == SYMTYPE) {
298    strncpy(linkname, &bufr[157], MAX_NAME_FIELD_SIZE);
299    linkname[MAX_NAME_FIELD_SIZE] = '\0';
300    rtems_printf(printer, "untar: symlink: %s -> %s\n", linkname, fname);
301    symlink(linkname, fname);
302  } else if (*linkflag == REGTYPE) {
303    rtems_printf(printer, "untar: file: %s (%i)\n", fname, (int) *file_size);
304    *nblocks = (((*file_size) + 511) & ~511) / 512;
305    if (Make_Path(printer, fname, false) < 0) {
306      retval  = UNTAR_FAIL;
307    }
308  } else if (*linkflag == DIRTYPE) {
309    int r;
310    rtems_printf(printer, "untar: dir: %s\n", fname);
311    if (Make_Path(printer, fname, true) < 0) {
312      retval  = UNTAR_FAIL;
313    }
314    r = mkdir(fname, S_IRWXU | S_IRWXG | S_IRWXO);
315    if (r < 0) {
316      if (errno == EEXIST) {
317        struct stat stat_buf;
318        if (stat(fname, &stat_buf) == 0) {
319          if (!S_ISDIR(stat_buf.st_mode)) {
320            r = unlink(fname);
321            if (r == 0) {
322              r = mkdir(fname, S_IRWXU | S_IRWXG | S_IRWXO);
323            }
324          }
325        }
326      }
327      if (r < 0) {
328        Print_Error(printer, "mkdir", fname);
329        retval = UNTAR_FAIL;
330      }
331    }
332  }
333
334  return retval;
335}
336
337/*
338 * Function: Untar_FromMemory
339 *
340 * Description:
341 *
342 *    This is a simple subroutine used to rip links, directories, and
343 *    files out of a block of memory.
344 *
345 *
346 * Inputs:
347 *
348 *    void *  tar_buf    - Pointer to TAR buffer.
349 *    size_t  size       - Length of TAR buffer.
350 *
351 *
352 * Output:
353 *
354 *    int - UNTAR_SUCCESSFUL (0)    on successful completion.
355 *          UNTAR_INVALID_CHECKSUM  for an invalid header checksum.
356 *          UNTAR_INVALID_HEADER    for an invalid header.
357 *
358 */
359int
360Untar_FromMemory_Print(
361  void                *tar_buf,
362  size_t               size,
363  const rtems_printer *printer
364)
365{
366  FILE           *fp;
367  const char     *tar_ptr = (const char *)tar_buf;
368  const char     *bufr;
369  char           fname[100];
370  int            retval = UNTAR_SUCCESSFUL;
371  unsigned long  ptr;
372  unsigned long  nblocks;
373  unsigned long  file_size;
374  unsigned char  linkflag;
375
376  rtems_printf(printer, "untar: memory at %p (%zu)\n", tar_buf, size);
377
378  ptr = 0;
379  while (true) {
380    if (ptr + 512 > size) {
381      retval = UNTAR_SUCCESSFUL;
382      break;
383    }
384
385    /* Read the header */
386    bufr = &tar_ptr[ptr];
387    ptr += 512;
388
389    retval = Untar_ProcessHeader(bufr, fname, &file_size, &nblocks, &linkflag, printer);
390
391    if (retval != UNTAR_SUCCESSFUL)
392      break;
393
394    if (linkflag == REGTYPE) {
395      if ((fp = fopen(fname, "w")) == NULL) {
396        Print_Error(printer, "open", fname);
397        ptr += 512 * nblocks;
398      } else {
399        unsigned long sizeToGo = file_size;
400        size_t        len;
401        size_t        i;
402        size_t        n;
403
404        /*
405         * Read out the data.  There are nblocks of data where nblocks is the
406         * file_size rounded to the nearest 512-byte boundary.
407         */
408        for (i = 0; i < nblocks; i++) {
409          len = ((sizeToGo < 512L) ? (sizeToGo) : (512L));
410          n = fwrite(&tar_ptr[ptr], 1, len, fp);
411          if (n != len) {
412            Print_Error(printer, "write", fname);
413            retval  = UNTAR_FAIL;
414            break;
415          }
416          ptr += 512;
417          sizeToGo -= n;
418        }
419        fclose(fp);
420      }
421
422    }
423  }
424
425  return retval;
426}
427
428/*
429 * Function: Untar_FromMemory
430 *
431 * Description:
432 *
433 *    This is a simple subroutine used to rip links, directories, and
434 *    files out of a block of memory.
435 *
436 *
437 * Inputs:
438 *
439 *    void *  tar_buf    - Pointer to TAR buffer.
440 *    size_t  size       - Length of TAR buffer.
441 *
442 *
443 * Output:
444 *
445 *    int - UNTAR_SUCCESSFUL (0)    on successful completion.
446 *          UNTAR_INVALID_CHECKSUM  for an invalid header checksum.
447 *          UNTAR_INVALID_HEADER    for an invalid header.
448 *
449 */
450int
451Untar_FromMemory(
452  void   *tar_buf,
453  size_t  size
454)
455{
456  return Untar_FromMemory_Print(tar_buf, size, false);
457}
458
459/*
460 * Function: Untar_FromFile
461 *
462 * Description:
463 *
464 *    This is a simple subroutine used to rip links, directories, and
465 *    files out of a TAR file.
466 *
467 * Inputs:
468 *
469 *    const char *tar_name   - TAR filename.
470 *
471 * Output:
472 *
473 *    int - UNTAR_SUCCESSFUL (0)    on successful completion.
474 *          UNTAR_INVALID_CHECKSUM  for an invalid header checksum.
475 *          UNTAR_INVALID_HEADER    for an invalid header.
476 */
477int
478Untar_FromFile_Print(
479  const char          *tar_name,
480  const rtems_printer *printer
481)
482{
483  int            fd;
484  char           *bufr;
485  ssize_t        n;
486  char           fname[100];
487  int            retval;
488  unsigned long  i;
489  unsigned long  nblocks;
490  unsigned long  file_size;
491  unsigned char  linkflag;
492
493  retval = UNTAR_SUCCESSFUL;
494
495  if ((fd = open(tar_name, O_RDONLY)) < 0) {
496    return UNTAR_FAIL;
497  }
498
499  bufr = (char *)malloc(512);
500  if (bufr == NULL) {
501    close(fd);
502    return(UNTAR_FAIL);
503  }
504
505  while (1) {
506    /* Read the header */
507    /* If the header read fails, we just consider it the end of the tarfile. */
508    if ((n = read(fd, bufr, 512)) != 512) {
509      break;
510    }
511
512    retval = Untar_ProcessHeader(bufr, fname, &file_size, &nblocks, &linkflag, printer);
513
514    if (retval != UNTAR_SUCCESSFUL)
515      break;
516
517    if (linkflag == REGTYPE) {
518      int out_fd;
519
520      /*
521       * Read out the data.  There are nblocks of data where nblocks
522       * is the size rounded to the nearest 512-byte boundary.
523       */
524
525      if ((out_fd = creat(fname, 0644)) == -1) {
526        (void) lseek(fd, SEEK_CUR, 512UL * nblocks);
527      } else {
528        for (i = 0; i < nblocks; i++) {
529          n = read(fd, bufr, 512);
530          n = MIN(n, file_size - (i * 512UL));
531          (void) write(out_fd, bufr, n);
532        }
533        close(out_fd);
534      }
535    }
536  }
537
538  free(bufr);
539  close(fd);
540
541  return retval;
542}
543
544/*
545 * Function: Untar_FromFile
546 *
547 * Description:
548 *
549 *    This is a simple subroutine used to rip links, directories, and
550 *    files out of a TAR file.
551 *
552 * Inputs:
553 *
554 *    const char *tar_name   - TAR filename.
555 *
556 * Output:
557 *
558 *    int - UNTAR_SUCCESSFUL (0)    on successful completion.
559 *          UNTAR_INVALID_CHECKSUM  for an invalid header checksum.
560 *          UNTAR_INVALID_HEADER    for an invalid header.
561 */
562int
563Untar_FromFile(
564  const char *tar_name
565)
566{
567  return Untar_FromFile_Print(tar_name, NULL);
568}
569
570/*
571 * Compute the TAR checksum and check with the value in
572 * the archive.  The checksum is computed over the entire
573 * header, but the checksum field is substituted with blanks.
574 */
575int
576_rtems_tar_header_checksum(
577  const char *bufr
578)
579{
580  int  i, sum;
581
582  sum = 0;
583  for (i=0; i<512; i++) {
584    if ((i >= 148) && (i < 156))
585      sum += 0xff & ' ';
586    else
587     sum += 0xff & bufr[i];
588  }
589  return(sum);
590}
Note: See TracBrowser for help on using the repository browser.