Changeset d42bd2f in rtems for cpukit/zlib


Ignore:
Timestamp:
Apr 25, 2008, 11:14:33 PM (12 years ago)
Author:
Ralf Corsepius <ralf.corsepius@…>
Branches:
4.10, 4.11, master
Children:
64bad829
Parents:
21a750bb
Message:

Import from zlib-1.2.4

File:
1 edited

Legend:

Unmodified
Added
Removed
  • cpukit/zlib/examples/gzlog.c

    r21a750bb rd42bd2f  
    11/*
    22 * gzlog.c
    3  * Copyright (C) 2004 Mark Adler
     3 * Copyright (C) 2004, 2008 Mark Adler, all rights reserved
    44 * For conditions of distribution and use, see copyright notice in gzlog.h
    5  * version 1.0, 26 Nov 2004
    6  *
     5 * version 2.0, 25 Apr 2008
    76 */
    87
    9 #include <string.h>             /* memcmp() */
    10 #include <stdlib.h>             /* malloc(), free(), NULL */
    11 #include <sys/types.h>          /* size_t, off_t */
    12 #include <unistd.h>             /* read(), close(), sleep(), ftruncate(), */
    13                                 /* lseek() */
    14 #include <fcntl.h>              /* open() */
    15 #include <sys/file.h>           /* flock() */
    16 #include "zlib.h"               /* deflateInit2(), deflate(), deflateEnd() */
    17 
    18 #include "gzlog.h"              /* interface */
     8/*
     9   gzlog provides a mechanism for frequently appending short strings to a gzip
     10   file that is efficient both in execution time and compression ratio.  The
     11   strategy is to write the short strings in an uncompressed form to the end of
     12   the gzip file, only compressing when the amount of uncompressed data has
     13   reached a given threshold.
     14
     15   gzlog also provides protection against interruptions in the process due to
     16   system crashes.  The status of the operation is recorded in an extra field
     17   in the gzip file, and is only updated once the gzip file is brought to a
     18   valid state.  The last data to be appended or compressed is saved in an
     19   auxiliary file, so that if the operation is interrupted, it can be completed
     20   the next time an append operation is attempted.
     21
     22   gzlog maintains another auxiliary file with the last 32K of data from the
     23   compressed portion, which is preloaded for the compression of the subsequent
     24   data.  This minimizes the impact to the compression ratio of appending.
     25 */
     26
     27/*
     28   Operations Concept:
     29
     30   Files (log name "foo"):
     31   foo.gz -- gzip file with the complete log
     32   foo.add -- last message to append or last data to compress
     33   foo.dict -- dictionary of the last 32K of data for next compression
     34   foo.temp -- temporary dictionary file for compression after this one
     35   foo.lock -- lock file for reading and writing the other files
     36   foo.repairs -- log file for log file recovery operations (not compressed)
     37
     38   gzip file structure:
     39   - fixed-length (no file name) header with extra field (see below)
     40   - compressed data ending initially with empty stored block
     41   - uncompressed data filling out originally empty stored block and
     42     subsequent stored blocks as needed (16K max each)
     43   - gzip trailer
     44   - no junk at end (no other gzip streams)
     45
     46   When appending data, the information in the first three items above plus the
     47   foo.add file are sufficient to recover an interrupted append operation.  The
     48   extra field has the necessary information to restore the start of the last
     49   stored block and determine where to append the data in the foo.add file, as
     50   well as the crc and length of the gzip data before the append operation.
     51
     52   The foo.add file is created before the gzip file is marked for append, and
     53   deleted after the gzip file is marked as complete.  So if the append
     54   operation is interrupted, the data to add will still be there.  If due to
     55   some external force, the foo.add file gets deleted between when the append
     56   operation was interrupted and when recovery is attempted, the gzip file will
     57   still be restored, but without the appended data.
     58
     59   When compressing data, the information in the first two items above plus the
     60   foo.add file are sufficient to recover an interrupted compress operation.
     61   The extra field has the necessary information to find the end of the
     62   compressed data, and contains both the crc and length of just the compressed
     63   data and of the complete set of data including the contents of the foo.add
     64   file.
     65
     66   Again, the foo.add file is maintained during the compress operation in case
     67   of an interruption.  If in the unlikely event the foo.add file with the data
     68   to be compressed is missing due to some external force, a gzip file with
     69   just the previous compressed data will be reconstructed.  In this case, all
     70   of the data that was to be compressed is lost (approximately one megabyte).
     71   This will not occur if all that happened was an interruption of the compress
     72   operation.
     73
     74   The third state that is marked is the replacement of the old dictionary with
     75   the new dictionary after a compress operation.  Once compression is
     76   complete, the gzip file is marked as being in the replace state.  This
     77   completes the gzip file, so an interrupt after being so marked does not
     78   result in recompression.  Then the dictionary file is replaced, and the gzip
     79   file is marked as completed.  This state prevents the possibility of
     80   restarting compression with the wrong dictionary file.
     81
     82   All three operations are wrapped by a lock/unlock procedure.  In order to
     83   gain exclusive access to the log files, first a foo.lock file must be
     84   exclusively created.  When all operations are complete, the lock is
     85   released by deleting the foo.lock file.  If when attempting to create the
     86   lock file, it already exists and the modify time of the lock file is more
     87   than five minutes old (set by the PATIENCE define below), then the old
     88   lock file is considered stale and deleted, and the exclusive creation of
     89   the lock file is retried.  To assure that there are no false assessments
     90   of the staleness of the lock file, the operations periodically touch the
     91   lock file to update the modified date.
     92
     93   Following is the definition of the extra field with all of the information
     94   required to enable the above append and compress operations and their
     95   recovery if interrupted.  Multi-byte values are stored little endian
     96   (consistent with the gzip format).  File pointers are eight bytes long.
     97   The crc's and lengths for the gzip trailer are four bytes long.  (Note that
     98   the length at the end of a gzip file is used for error checking only, and
     99   for large files is actually the length modulo 2^32.)  The stored block
     100   length is two bytes long.  The gzip extra field two-byte identification is
     101   "ap" for append.  It is assumed that writing the extra field to the file is
     102   an "atomic" operation.  That is, either all of the extra field is written
     103   to the file, or none of it is, if the operation is interrupted right at the
     104   point of updating the extra field.  This is a reasonable assumption, since
     105   the extra field is within the first 52 bytes of the file, which is smaller
     106   than any expected block size for a mass storage device (usually 512 bytes or
     107   larger).
     108
     109   Extra field (35 bytes):
     110   - Pointer to first stored block length -- this points to the two-byte length
     111     of the first stored block, which is followed by the two-byte, one's
     112     complement of that length.  The stored block length is preceded by the
     113     three-bit header of the stored block, which is the actual start of the
     114     stored block in the deflate format.  See the bit offset field below.
     115   - Pointer to the last stored block length.  This is the same as above, but
     116     for the last stored block of the uncompressed data in the gzip file.
     117     Initially this is the same as the first stored block length pointer.
     118     When the stored block gets to 16K (see the MAX_STORE define), then a new
     119     stored block as added, at which point the last stored block length pointer
     120     is different from the first stored block length pointer.  When they are
     121     different, the first bit of the last stored block header is eight bits, or
     122     one byte back from the block length.
     123   - Compressed data crc and length.  This is the crc and length of the data
     124     that is in the compressed portion of the deflate stream.  These are used
     125     only in the event that the foo.add file containing the data to compress is
     126     lost after a compress operation is interrupted.
     127   - Total data crc and length.  This is the crc and length of all of the data
     128     stored in the gzip file, compressed and uncompressed.  It is used to
     129     reconstruct the gzip trailer when compressing, as well as when recovering
     130     interrupted operations.
     131   - Final stored block length.  This is used to quickly find where to append,
     132     and allows the restoration of the original final stored block state when
     133     an append operation is interrupted.
     134   - First stored block start as the number of bits back from the final stored
     135     block first length byte.  This value is in the range of 3..10, and is
     136     stored as the low three bits of the final byte of the extra field after
     137     subtracting three (0..7).  This allows the last-block bit of the stored
     138     block header to be updated when a new stored block is added, for the case
     139     when the first stored block and the last stored block are the same.  (When
     140     they are different, the numbers of bits back is known to be eight.)  This
     141     also allows for new compressed data to be appended to the old compressed
     142     data in the compress operation, overwriting the previous first stored
     143     block, or for the compressed data to be terminated and a valid gzip file
     144     reconstructed on the off chance that a compression operation was
     145     interrupted and the data to compress in the foo.add file was deleted.
     146   - The operation in process.  This is the next two bits in the last byte (the
     147     bits under the mask 0x18).  The are interpreted as 0: nothing in process,
     148     1: append in process, 2: compress in process, 3: replace in process.
     149   - The top three bits of the last byte in the extra field are reserved and
     150     are currently set to zero.
     151
     152   Main procedure:
     153   - Exclusively create the foo.lock file using the O_CREAT and O_EXCL modes of
     154     the system open() call.  If the modify time of an existing lock file is
     155     more than PATIENCE seconds old, then the lock file is deleted and the
     156     exclusive create is retried.
     157   - Load the extra field from the foo.gz file, and see if an operation was in
     158     progress but not completed.  If so, apply the recovery procedure below.
     159   - Perform the append procedure with the provided data.
     160   - If the uncompressed data in the foo.gz file is 1MB or more, apply the
     161     compress procedure.
     162   - Delete the foo.lock file.
     163
     164   Append procedure:
     165   - Put what to append in the foo.add file so that the operation can be
     166     restarted if this procedure is interrupted.
     167   - Mark the foo.gz extra field with the append operation in progress.
     168   + Restore the original last-block bit and stored block length of the last
     169     stored block from the information in the extra field, in case a previous
     170     append operation was interrupted.
     171   - Append the provided data to the last stored block, creating new stored
     172     blocks as needed and updating the stored blocks last-block bits and
     173     lengths.
     174   - Update the crc and length with the new data, and write the gzip trailer.
     175   - Write over the extra field (with a single write operation) with the new
     176     pointers, lengths, and crc's, and mark the gzip file as not in process.
     177     Though there is still a foo.add file, it will be ignored since nothing
     178     is in process.  If a foo.add file is leftover from a previously
     179     completed operation, it is truncated when writing new data to it.
     180   - Delete the foo.add file.
     181
     182   Compress and replace procedures:
     183   - Read all of the uncompressed data in the stored blocks in foo.gz and write
     184     it to foo.add.  Also write foo.temp with the last 32K of that data to
     185     provide a dictionary for the next invocation of this procedure.
     186   - Rewrite the extra field marking foo.gz with a compression in process.
     187   * If there is no data provided to compress (due to a missing foo.add file
     188     when recovering), reconstruct and truncate the foo.gz file to contain
     189     only the previous compressed data and proceed to the step after the next
     190     one.  Otherwise ...
     191   - Compress the data with the dictionary in foo.dict, and write to the
     192     foo.gz file starting at the bit immediately following the last previously
     193     compressed block.  If there is no foo.dict, proceed anyway with the
     194     compression at slightly reduced efficiency.  (For the foo.dict file to be
     195     missing requires some external failure beyond simply the interruption of
     196     a compress operation.)  During this process, the foo.lock file is
     197     periodically touched to assure that that file is not considered stale by
     198     another process before we're done.  The deflation is terminated with a
     199     non-last empty static block (10 bits long), that is then located and
     200     written over by a last-bit-set empty stored block.
     201   - Append the crc and length of the data in the gzip file (previously
     202     calculated during the append operations).
     203   - Write over the extra field with the updated stored block offsets, bits
     204     back, crc's, and lengths, and mark foo.gz as in process for a replacement
     205     of the dictionary.
     206   @ Delete the foo.add file.
     207   - Replace foo.dict with foo.temp.
     208   - Write over the extra field, marking foo.gz as complete.
     209
     210   Recovery procedure:
     211   - If not a replace recovery, read in the foo.add file, and provide that data
     212     to the appropriate recovery below.  If there is no foo.add file, provide
     213     a zero data length to the recovery.  In that case, the append recovery
     214     restores the foo.gz to the previous compressed + uncompressed data state.
     215     For the the compress recovery, a missing foo.add file results in foo.gz
     216     being restored to the previous compressed-only data state.
     217   - Append recovery:
     218     - Pick up append at + step above
     219   - Compress recovery:
     220     - Pick up compress at * step above
     221   - Replace recovery:
     222     - Pick up compress at @ step above
     223   - Log the repair with a date stamp in foo.repairs
     224 */
     225
     226#include <sys/types.h>
     227#include <stdio.h>      /* rename, fopen, fprintf, fclose */
     228#include <stdlib.h>     /* malloc, free */
     229#include <string.h>     /* strlen, strrchr, strcpy, strncpy, strcmp */
     230#include <fcntl.h>      /* open */
     231#include <unistd.h>     /* lseek, read, write, close, unlink, sleep, */
     232                        /* ftruncate, fsync */
     233#include <errno.h>      /* errno */
     234#include <time.h>       /* time, ctime */
     235#include <sys/stat.h>   /* stat */
     236#include <sys/time.h>   /* utimes */
     237#include "zlib.h"       /* crc32 */
     238
     239#include "gzlog.h"      /* header for external access */
     240
    19241#define local static
    20 
    21 /* log object structure */
    22 typedef struct {
    23     int id;                 /* object identifier */
    24     int fd;                 /* log file descriptor */
    25     off_t extra;            /* offset of extra "ap" subfield */
    26     off_t mark_off;         /* offset of marked data */
    27     off_t last_off;         /* offset of last block */
    28     unsigned long crc;      /* uncompressed crc */
    29     unsigned long len;      /* uncompressed length (modulo 2^32) */
    30     unsigned stored;        /* length of current stored block */
    31 } gz_log;
    32 
    33 #define GZLOGID 19334       /* gz_log object identifier */
    34 
    35 #define LOCK_RETRY 1            /* retry lock once a second */
    36 #define LOCK_PATIENCE 1200      /* try about twenty minutes before forcing */
    37 
    38 /* acquire a lock on a file */
    39 local int lock(int fd)
    40 {
    41     int patience;
    42 
    43     /* try to lock every LOCK_RETRY seconds for LOCK_PATIENCE seconds */
    44     patience = LOCK_PATIENCE;
    45     do {
    46         if (flock(fd, LOCK_EX + LOCK_NB) == 0)
    47             return 0;
    48         (void)sleep(LOCK_RETRY);
    49         patience -= LOCK_RETRY;
    50     } while (patience > 0);
    51 
    52     /* we've run out of patience -- give up */
    53     return -1;
    54 }
    55 
    56 /* release lock */
    57 local void unlock(int fd)
    58 {
    59     (void)flock(fd, LOCK_UN);
    60 }
    61 
    62 /* release a log object */
    63 local void log_clean(gz_log *log)
    64 {
    65     unlock(log->fd);
    66     (void)close(log->fd);
    67     free(log);
    68 }
    69 
    70 /* read an unsigned long from a byte buffer little-endian */
    71 local unsigned long make_ulg(unsigned char *buf)
    72 {
    73     int n;
    74     unsigned long val;
    75 
    76     val = (unsigned long)(*buf++);
    77     for (n = 8; n < 32; n += 8)
    78         val += (unsigned long)(*buf++) << n;
    79     return val;
    80 }
    81 
    82 /* read an off_t from a byte buffer little-endian */
    83 local off_t make_off(unsigned char *buf)
    84 {
    85     int n;
    86     off_t val;
    87 
    88     val = (off_t)(*buf++);
    89     for (n = 8; n < 64; n += 8)
    90         val += (off_t)(*buf++) << n;
    91     return val;
    92 }
    93 
    94 /* write an unsigned long little-endian to byte buffer */
    95 local void dice_ulg(unsigned long val, unsigned char *buf)
    96 {
    97     int n;
    98 
    99     for (n = 0; n < 4; n++) {
    100         *buf++ = val & 0xff;
    101         val >>= 8;
    102     }
    103 }
    104 
    105 /* write an off_t little-endian to byte buffer */
    106 local void dice_off(off_t val, unsigned char *buf)
    107 {
    108     int n;
    109 
    110     for (n = 0; n < 8; n++) {
    111         *buf++ = val & 0xff;
    112         val >>= 8;
    113     }
    114 }
    115 
    116 /* initial, empty gzip file for appending */
    117 local char empty_gz[] = {
     242typedef unsigned int uint;
     243typedef unsigned long ulong;
     244
     245/* Macro for debugging to deterministically force recovery operations */
     246#ifdef DEBUG
     247    #include <setjmp.h>         /* longjmp */
     248    jmp_buf gzlog_jump;         /* where to go back to */
     249    int gzlog_bail = 0;         /* which point to bail at (1..8) */
     250    int gzlog_count = -1;       /* number of times through to wait */
     251#   define BAIL(n) do { if (n == gzlog_bail && gzlog_count-- == 0) \
     252                            longjmp(gzlog_jump, gzlog_bail); } while (0)
     253#else
     254#   define BAIL(n)
     255#endif
     256
     257/* how old the lock file can be in seconds before considering it stale */
     258#define PATIENCE 300
     259
     260/* maximum stored block size in Kbytes -- must be in 1..63 */
     261#define MAX_STORE 16
     262
     263/* number of stored Kbytes to trigger compression (must be >= 32 to allow
     264   dictionary construction, and <= 204 * MAX_STORE, in order for >> 10 to
     265   discard the stored block headers contribution of five bytes each) */
     266#define TRIGGER 1024
     267
     268/* size of a deflate dictionary (this cannot be changed) */
     269#define DICT 32768U
     270
     271/* values for the operation (2 bits) */
     272#define NO_OP 0
     273#define APPEND_OP 1
     274#define COMPRESS_OP 2
     275#define REPLACE_OP 3
     276
     277/* macros to extract little-endian integers from an unsigned byte buffer */
     278#define PULL2(p) ((p)[0]+((uint)((p)[1])<<8))
     279#define PULL4(p) (PULL2(p)+((ulong)PULL2(p+2)<<16))
     280#define PULL8(p) (PULL4(p)+((off_t)PULL4(p+4)<<32))
     281
     282/* macros to store integers into a byte buffer in little-endian order */
     283#define PUT2(p,a) do {(p)[0]=a;(p)[1]=(a)>>8;} while(0)
     284#define PUT4(p,a) do {PUT2(p,a);PUT2(p+2,a>>16);} while(0)
     285#define PUT8(p,a) do {PUT4(p,a);PUT4(p+4,a>>32);} while(0)
     286
     287/* internal structure for log information */
     288#define LOGID "\106\035\172"    /* should be three non-zero characters */
     289struct log {
     290    char id[4];     /* contains LOGID to detect inadvertent overwrites */
     291    int fd;         /* file descriptor for .gz file, opened read/write */
     292    char *path;     /* allocated path, e.g. "/var/log/foo" or "foo" */
     293    char *end;      /* end of path, for appending suffices such as ".gz" */
     294    off_t first;    /* offset of first stored block first length byte */
     295    int back;       /* location of first block id in bits back from first */
     296    uint stored;    /* bytes currently in last stored block */
     297    off_t last;     /* offset of last stored block first length byte */
     298    ulong ccrc;     /* crc of compressed data */
     299    ulong clen;     /* length (modulo 2^32) of compressed data */
     300    ulong tcrc;     /* crc of total data */
     301    ulong tlen;     /* length (modulo 2^32) of total data */
     302    time_t lock;    /* last modify time of our lock file */
     303};
     304
     305/* gzip header for gzlog */
     306local unsigned char log_gzhead[] = {
    118307    0x1f, 0x8b,                 /* magic gzip id */
    119308    8,                          /* compression method is deflate */
    120     4,                          /* there is an extra field */
     309    4,                          /* there is an extra field (no file name) */
    121310    0, 0, 0, 0,                 /* no modification time provided */
    122     0, 0xff,                    /* no extra flags, no OS */
    123     20, 0, 'a', 'p', 16, 0,     /* extra field with "ap" subfield */
    124     32, 0, 0, 0, 0, 0, 0, 0,    /* offset of uncompressed data */
    125     32, 0, 0, 0, 0, 0, 0, 0,    /* offset of last block */
     311    0, 0xff,                    /* no extra flags, no OS specified */
     312    39, 0, 'a', 'p', 35, 0      /* extra field with "ap" subfield */
     313                                /* 35 is EXTRA, 39 is EXTRA + 4 */
     314};
     315
     316#define HEAD sizeof(log_gzhead)     /* should be 16 */
     317
     318/* initial gzip extra field content (52 == HEAD + EXTRA + 1) */
     319local unsigned char log_gzext[] = {
     320    52, 0, 0, 0, 0, 0, 0, 0,    /* offset of first stored block length */
     321    52, 0, 0, 0, 0, 0, 0, 0,    /* offset of last stored block length */
     322    0, 0, 0, 0, 0, 0, 0, 0,     /* compressed data crc and length */
     323    0, 0, 0, 0, 0, 0, 0, 0,     /* total data crc and length */
     324    0, 0,                       /* final stored block data length */
     325    5                           /* op is NO_OP, last bit 8 bits back */
     326};
     327
     328#define EXTRA sizeof(log_gzext)     /* should be 35 */
     329
     330/* initial gzip data and trailer */
     331local unsigned char log_gzbody[] = {
    126332    1, 0, 0, 0xff, 0xff,        /* empty stored block (last) */
    127333    0, 0, 0, 0,                 /* crc */
     
    129335};
    130336
    131 /* initialize a log object with locking */
    132 void *gzlog_open(char *path)
    133 {
    134     unsigned xlen;
    135     unsigned char temp[20];
    136     unsigned sub_len;
    137     int good;
    138     gz_log *log;
    139 
    140     /* allocate log structure */
    141     log = malloc(sizeof(gz_log));
     337#define BODY sizeof(log_gzbody)
     338
     339/* Exclusively create foo.lock in order to negotiate exclusive access to the
     340   foo.* files.  If the modify time of an existing lock file is greater than
     341   PATIENCE seconds in the past, then consider the lock file to have been
     342   abandoned, delete it, and try the exclusive create again.  Save the lock
     343   file modify time for verification of ownership.  Return 0 on success, or -1
     344   on failure, usually due to an access restriction or invalid path.  Note that
     345   if stat() or unlink() fails, it may be due to another process noticing the
     346   abandoned lock file a smidge sooner and deleting it, so those are not
     347   flagged as an error. */
     348local int log_lock(struct log *log)
     349{
     350    int fd;
     351    struct stat st;
     352
     353    strcpy(log->end, ".lock");
     354    while ((fd = open(log->path, O_CREAT | O_EXCL, 0644)) < 0) {
     355        if (errno != EEXIST)
     356            return -1;
     357        if (stat(log->path, &st) == 0 && time(NULL) - st.st_mtime > PATIENCE) {
     358            unlink(log->path);
     359            continue;
     360        }
     361        sleep(2);       /* relinquish the CPU for two seconds while waiting */
     362    }
     363    close(fd);
     364    if (stat(log->path, &st) == 0)
     365        log->lock = st.st_mtime;
     366    return 0;
     367}
     368
     369/* Update the modify time of the lock file to now, in order to prevent another
     370   task from thinking that the lock is stale.  Save the lock file modify time
     371   for verification of ownership. */
     372local void log_touch(struct log *log)
     373{
     374    struct stat st;
     375
     376    strcpy(log->end, ".lock");
     377    utimes(log->path, NULL);
     378    if (stat(log->path, &st) == 0)
     379        log->lock = st.st_mtime;
     380}
     381
     382/* Check the log file modify time against what is expected.  Return true if
     383   this is not our lock.  If it is our lock, touch it to keep it. */
     384local int log_check(struct log *log)
     385{
     386    struct stat st;
     387
     388    strcpy(log->end, ".lock");
     389    if (stat(log->path, &st) || st.st_mtime != log->lock)
     390        return 1;
     391    log_touch(log);
     392    return 0;
     393}
     394
     395/* Unlock a previously acquired lock, but only if it's ours. */
     396local void log_unlock(struct log *log)
     397{
     398    if (log_check(log))
     399        return;
     400    strcpy(log->end, ".lock");
     401    unlink(log->path);
     402    log->lock = 0;
     403}
     404
     405/* Check the gzip header and read in the extra field, filling in the values in
     406   the log structure.  Return op on success or -1 if the gzip header was not as
     407   expected.  op is the current operation in progress last written to the extra
     408   field.  This assumes that the gzip file has already been opened, with the
     409   file descriptor log->fd. */
     410local int log_head(struct log *log)
     411{
     412    int op;
     413    unsigned char buf[HEAD + EXTRA];
     414
     415    if (lseek(log->fd, 0, SEEK_SET) < 0 ||
     416        read(log->fd, buf, HEAD + EXTRA) != HEAD + EXTRA ||
     417        memcmp(buf, log_gzhead, HEAD)) {
     418        return -1;
     419    }
     420    log->first = PULL8(buf + HEAD);
     421    log->last = PULL8(buf + HEAD + 8);
     422    log->ccrc = PULL4(buf + HEAD + 16);
     423    log->clen = PULL4(buf + HEAD + 20);
     424    log->tcrc = PULL4(buf + HEAD + 24);
     425    log->tlen = PULL4(buf + HEAD + 28);
     426    log->stored = PULL2(buf + HEAD + 32);
     427    log->back = 3 + (buf[HEAD + 34] & 7);
     428    op = (buf[HEAD + 34] >> 3) & 3;
     429    return op;
     430}
     431
     432/* Write over the extra field contents, marking the operation as op.  Use fsync
     433   to assure that the device is written to, and in the requested order.  This
     434   operation, and only this operation, is assumed to be atomic in order to
     435   assure that the log is recoverable in the event of an interruption at any
     436   point in the process.  Return -1 if the write to foo.gz failed. */
     437local int log_mark(struct log *log, int op)
     438{
     439    int ret;
     440    unsigned char ext[EXTRA];
     441
     442    PUT8(ext, log->first);
     443    PUT8(ext + 8, log->last);
     444    PUT4(ext + 16, log->ccrc);
     445    PUT4(ext + 20, log->clen);
     446    PUT4(ext + 24, log->tcrc);
     447    PUT4(ext + 28, log->tlen);
     448    PUT2(ext + 32, log->stored);
     449    ext[34] = log->back - 3 + (op << 3);
     450    fsync(log->fd);
     451    ret = lseek(log->fd, HEAD, SEEK_SET) < 0 ||
     452          write(log->fd, ext, EXTRA) != EXTRA ? -1 : 0;
     453    fsync(log->fd);
     454    return ret;
     455}
     456
     457/* Rewrite the last block header bits and subsequent zero bits to get to a byte
     458   boundary, setting the last block bit if last is true, and then write the
     459   remainder of the stored block header (length and one's complement).  Leave
     460   the file pointer after the end of the last stored block data.  Return -1 if
     461   there is a read or write failure on the foo.gz file */
     462local int log_last(struct log *log, int last)
     463{
     464    int back, len, mask;
     465    unsigned char buf[6];
     466
     467    /* determine the locations of the bytes and bits to modify */
     468    back = log->last == log->first ? log->back : 8;
     469    len = back > 8 ? 2 : 1;                 /* bytes back from log->last */
     470    mask = 0x80 >> ((back - 1) & 7);        /* mask for block last-bit */
     471
     472    /* get the byte to modify (one or two back) into buf[0] -- don't need to
     473       read the byte if the last-bit is eight bits back, since in that case
     474       the entire byte will be modified */
     475    buf[0] = 0;
     476    if (back != 8 && (lseek(log->fd, log->last - len, SEEK_SET) < 0 ||
     477                      read(log->fd, buf, 1) != 1))
     478        return -1;
     479
     480    /* change the last-bit of the last stored block as requested -- note
     481       that all bits above the last-bit are set to zero, per the type bits
     482       of a stored block being 00 and per the convention that the bits to
     483       bring the stream to a byte boundary are also zeros */
     484    buf[1] = 0;
     485    buf[2 - len] = (*buf & (mask - 1)) + (last ? mask : 0);
     486
     487    /* write the modified stored block header and lengths, move the file
     488       pointer to after the last stored block data */
     489    PUT2(buf + 2, log->stored);
     490    PUT2(buf + 4, log->stored ^ 0xffff);
     491    return lseek(log->fd, log->last - len, SEEK_SET) < 0 ||
     492           write(log->fd, buf + 2 - len, len + 4) != len + 4 ||
     493           lseek(log->fd, log->stored, SEEK_CUR) < 0 ? -1 : 0;
     494}
     495
     496/* Append len bytes from data to the locked and open log file.  len may be zero
     497   if recovering and no .add file was found.  In that case, the previous state
     498   of the foo.gz file is restored.  The data is appended uncompressed in
     499   deflate stored blocks.  Return -1 if there was an error reading or writing
     500   the foo.gz file. */
     501local int log_append(struct log *log, unsigned char *data, size_t len)
     502{
     503    uint put;
     504    off_t end;
     505    unsigned char buf[8];
     506
     507    /* set the last block last-bit and length, in case recovering an
     508       interrupted append, then position the file pointer to append to the
     509       block */
     510    if (log_last(log, 1))
     511        return -1;
     512
     513    /* append, adding stored blocks and updating the offset of the last stored
     514       block as needed, and update the total crc and length */
     515    while (len) {
     516        /* append as much as we can to the last block */
     517        put = (MAX_STORE << 10) - log->stored;
     518        if (put > len)
     519            put = (uint)len;
     520        if (put) {
     521            if (write(log->fd, data, put) != put)
     522                return -1;
     523            BAIL(1);
     524            log->tcrc = crc32(log->tcrc, data, put);
     525            log->tlen += put;
     526            log->stored += put;
     527            data += put;
     528            len -= put;
     529        }
     530
     531        /* if we need to, add a new empty stored block */
     532        if (len) {
     533            /* mark current block as not last */
     534            if (log_last(log, 0))
     535                return -1;
     536
     537            /* point to new, empty stored block */
     538            log->last += 4 + log->stored + 1;
     539            log->stored = 0;
     540        }
     541
     542        /* mark last block as last, update its length */
     543        if (log_last(log, 1))
     544            return -1;
     545        BAIL(2);
     546    }
     547
     548    /* write the new crc and length trailer, and truncate just in case (could
     549       be recovering from partial append with a missing foo.add file) */
     550    PUT4(buf, log->tcrc);
     551    PUT4(buf + 4, log->tlen);
     552    if (write(log->fd, buf, 8) != 8 ||
     553        (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end))
     554        return -1;
     555
     556    /* write the extra field, marking the log file as done, delete .add file */
     557    if (log_mark(log, NO_OP))
     558        return -1;
     559    strcpy(log->end, ".add");
     560    unlink(log->path);          /* ignore error, since may not exist */
     561    return 0;
     562}
     563
     564/* Replace the foo.dict file with the foo.temp file.  Also delete the foo.add
     565   file, since the compress operation may have been interrupted before that was
     566   done.  Returns 1 if memory could not be allocated, or -1 if reading or
     567   writing foo.gz fails, or if the rename fails for some reason other than
     568   foo.temp not existing.  foo.temp not existing is a permitted error, since
     569   the replace operation may have been interrupted after the rename is done,
     570   but before foo.gz is marked as complete. */
     571local int log_replace(struct log *log)
     572{
     573    int ret;
     574    char *dest;
     575
     576    /* delete foo.add file */
     577    strcpy(log->end, ".add");
     578    unlink(log->path);         /* ignore error, since may not exist */
     579    BAIL(3);
     580
     581    /* rename foo.name to foo.dict, replacing foo.dict if it exists */
     582    strcpy(log->end, ".dict");
     583    dest = malloc(strlen(log->path) + 1);
     584    if (dest == NULL)
     585        return -2;
     586    strcpy(dest, log->path);
     587    strcpy(log->end, ".temp");
     588    ret = rename(log->path, dest);
     589    free(dest);
     590    if (ret && errno != ENOENT)
     591        return -1;
     592    BAIL(4);
     593
     594    /* mark the foo.gz file as done */
     595    return log_mark(log, NO_OP);
     596}
     597
     598/* Compress the len bytes at data and append the compressed data to the
     599   foo.gz deflate data immediately after the previous compressed data.  This
     600   overwrites the previous uncompressed data, which was stored in foo.add
     601   and is the data provided in data[0..len-1].  If this operation is
     602   interrupted, it picks up at the start of this routine, with the foo.add
     603   file read in again.  If there is no data to compress (len == 0), then we
     604   simply terminate the foo.gz file after the previously compressed data,
     605   appending a final empty stored block and the gzip trailer.  Return -1 if
     606   reading or writing the log.gz file failed, or -2 if there was a memory
     607   allocation failure. */
     608local int log_compress(struct log *log, unsigned char *data, size_t len)
     609{
     610    int fd;
     611    uint got, max;
     612    ssize_t dict;
     613    off_t end;
     614    z_stream strm;
     615    unsigned char buf[DICT];
     616
     617    /* compress and append compressed data */
     618    if (len) {
     619        /* set up for deflate, allocating memory */
     620        strm.zalloc = Z_NULL;
     621        strm.zfree = Z_NULL;
     622        strm.opaque = Z_NULL;
     623        if (deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -15, 8,
     624                         Z_DEFAULT_STRATEGY) != Z_OK)
     625            return -2;
     626
     627        /* read in dictionary (last 32K of data that was compressed) */
     628        strcpy(log->end, ".dict");
     629        fd = open(log->path, O_RDONLY, 0);
     630        if (fd >= 0) {
     631            dict = read(fd, buf, DICT);
     632            close(fd);
     633            if (dict < 0) {
     634                deflateEnd(&strm);
     635                return -1;
     636            }
     637            if (dict)
     638                deflateSetDictionary(&strm, buf, (uint)dict);
     639        }
     640        log_touch(log);
     641
     642        /* prime deflate with last bits of previous block, position write
     643           pointer to write those bits and overwrite what follows */
     644        if (lseek(log->fd, log->first - (log->back > 8 ? 2 : 1),
     645                SEEK_SET) < 0 ||
     646            read(log->fd, buf, 1) != 1 || lseek(log->fd, -1, SEEK_CUR) < 0) {
     647            deflateEnd(&strm);
     648            return -1;
     649        }
     650        deflatePrime(&strm, (8 - log->back) & 7, *buf);
     651
     652        /* compress, finishing with a partial non-last empty static block */
     653        strm.next_in = data;
     654        max = (((uint)0 - 1) >> 1) + 1; /* in case int smaller than size_t */
     655        do {
     656            strm.avail_in = len > max ? max : (uint)len;
     657            len -= strm.avail_in;
     658            do {
     659                strm.avail_out = DICT;
     660                strm.next_out = buf;
     661                deflate(&strm, len ? Z_NO_FLUSH : Z_PARTIAL_FLUSH);
     662                got = DICT - strm.avail_out;
     663                if (got && write(log->fd, buf, got) != got) {
     664                    deflateEnd(&strm);
     665                    return -1;
     666                }
     667                log_touch(log);
     668            } while (strm.avail_out == 0);
     669        } while (len);
     670        deflateEnd(&strm);
     671        BAIL(5);
     672
     673        /* find start of empty static block -- scanning backwards the first one
     674           bit is the second bit of the block, if the last byte is zero, then
     675           we know the byte before that has a one in the top bit, since an
     676           empty static block is ten bits long */
     677        if ((log->first = lseek(log->fd, -1, SEEK_CUR)) < 0 ||
     678            read(log->fd, buf, 1) != 1)
     679            return -1;
     680        log->first++;
     681        if (*buf) {
     682            log->back = 1;
     683            while ((*buf & ((uint)1 << (8 - log->back++))) == 0)
     684                ;       /* guaranteed to terminate, since *buf != 0 */
     685        }
     686        else
     687            log->back = 10;
     688
     689        /* update compressed crc and length */
     690        log->ccrc = log->tcrc;
     691        log->clen = log->tlen;
     692    }
     693    else {
     694        /* no data to compress -- fix up existing gzip stream */
     695        log->tcrc = log->ccrc;
     696        log->tlen = log->clen;
     697    }
     698
     699    /* complete and truncate gzip stream */
     700    log->last = log->first;
     701    log->stored = 0;
     702    PUT4(buf, log->tcrc);
     703    PUT4(buf + 4, log->tlen);
     704    if (log_last(log, 1) || write(log->fd, buf, 8) != 8 ||
     705        (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end))
     706        return -1;
     707    BAIL(6);
     708
     709    /* mark as being in the replace operation */
     710    if (log_mark(log, REPLACE_OP))
     711        return -1;
     712
     713    /* execute the replace operation and mark the file as done */
     714    return log_replace(log);
     715}
     716
     717/* log a repair record to the .repairs file */
     718local void log_log(struct log *log, int op, char *record)
     719{
     720    time_t now;
     721    FILE *rec;
     722
     723    now = time(NULL);
     724    strcpy(log->end, ".repairs");
     725    rec = fopen(log->path, "a");
     726    if (rec == NULL)
     727        return;
     728    fprintf(rec, "%.24s %s recovery: %s\n", ctime(&now), op == APPEND_OP ?
     729            "append" : (op == COMPRESS_OP ? "compress" : "replace"), record);
     730    fclose(rec);
     731    return;
     732}
     733
     734/* Recover the interrupted operation op.  First read foo.add for recovering an
     735   append or compress operation.  Return -1 if there was an error reading or
     736   writing foo.gz or reading an existing foo.add, or -2 if there was a memory
     737   allocation failure. */
     738local int log_recover(struct log *log, int op)
     739{
     740    int fd, ret = 0;
     741    unsigned char *data = NULL;
     742    size_t len = 0;
     743    struct stat st;
     744
     745    /* log recovery */
     746    log_log(log, op, "start");
     747
     748    /* load foo.add file if expected and present */
     749    if (op == APPEND_OP || op == COMPRESS_OP) {
     750        strcpy(log->end, ".add");
     751        if (stat(log->path, &st) == 0 && st.st_size) {
     752            len = (size_t)(st.st_size);
     753            if (len != st.st_size || (data = malloc(st.st_size)) == NULL) {
     754                log_log(log, op, "allocation failure");
     755                return -2;
     756            }
     757            if ((fd = open(log->path, O_RDONLY, 0)) < 0) {
     758                log_log(log, op, ".add file read failure");
     759                return -1;
     760            }
     761            ret = read(fd, data, len) != len;
     762            close(fd);
     763            if (ret) {
     764                log_log(log, op, ".add file read failure");
     765                return -1;
     766            }
     767            log_log(log, op, "loaded .add file");
     768        }
     769        else
     770            log_log(log, op, "missing .add file!");
     771    }
     772
     773    /* recover the interrupted operation */
     774    switch (op) {
     775    case APPEND_OP:
     776        ret = log_append(log, data, len);
     777        break;
     778    case COMPRESS_OP:
     779        ret = log_compress(log, data, len);
     780        break;
     781    case REPLACE_OP:
     782        ret = log_replace(log);
     783    }
     784
     785    /* log status */
     786    log_log(log, op, ret ? "failure" : "complete");
     787
     788    /* clean up */
     789    if (data != NULL)
     790        free(data);
     791    return ret;
     792}
     793
     794/* Close the foo.gz file (if open) and release the lock. */
     795local void log_close(struct log *log)
     796{
     797    if (log->fd >= 0)
     798        close(log->fd);
     799    log->fd = -1;
     800    log_unlock(log);
     801}
     802
     803/* Open foo.gz, verify the header, and load the extra field contents, after
     804   first creating the foo.lock file to gain exclusive access to the foo.*
     805   files.  If foo.gz does not exist or is empty, then write the initial header,
     806   extra, and body content of an empty foo.gz log file.  If there is an error
     807   creating the lock file due to access restrictions, or an error reading or
     808   writing the foo.gz file, or if the foo.gz file is not a proper log file for
     809   this object (e.g. not a gzip file or does not contain the expected extra
     810   field), then return true.  If there is an error, the lock is released.
     811   Otherwise, the lock is left in place. */
     812local int log_open(struct log *log)
     813{
     814    int op;
     815
     816    /* release open file resource if left over -- can occur if lock lost
     817       between gzlog_open() and gzlog_write() */
     818    if (log->fd >= 0)
     819        close(log->fd);
     820    log->fd = -1;
     821
     822    /* negotiate exclusive access */
     823    if (log_lock(log) < 0)
     824        return -1;
     825
     826    /* open the log file, foo.gz */
     827    strcpy(log->end, ".gz");
     828    log->fd = open(log->path, O_RDWR | O_CREAT, 0644);
     829    if (log->fd < 0) {
     830        log_close(log);
     831        return -1;
     832    }
     833
     834    /* if new, initialize foo.gz with an empty log, delete old dictionary */
     835    if (lseek(log->fd, 0, SEEK_END) == 0) {
     836        if (write(log->fd, log_gzhead, HEAD) != HEAD ||
     837            write(log->fd, log_gzext, EXTRA) != EXTRA ||
     838            write(log->fd, log_gzbody, BODY) != BODY) {
     839            log_close(log);
     840            return -1;
     841        }
     842        strcpy(log->end, ".dict");
     843        unlink(log->path);
     844    }
     845
     846    /* verify log file and load extra field information */
     847    if ((op = log_head(log)) < 0) {
     848        log_close(log);
     849        return -1;
     850    }
     851
     852    /* check for interrupted process and if so, recover */
     853    if (op != NO_OP && log_recover(log, op)) {
     854        log_close(log);
     855        return -1;
     856    }
     857
     858    /* touch the lock file to prevent another process from grabbing it */
     859    log_touch(log);
     860    return 0;
     861}
     862
     863/* See gzlog.h for the description of the external methods below */
     864gzlog *gzlog_open(char *path)
     865{
     866    size_t n;
     867    struct log *log;
     868
     869    /* check arguments */
     870    if (path == NULL || *path == 0)
     871        return NULL;
     872
     873    /* allocate and initialize log structure */
     874    log = malloc(sizeof(struct log));
    142875    if (log == NULL)
    143876        return NULL;
    144     log->id = GZLOGID;
    145 
    146     /* open file, creating it if necessary, and locking it */
    147     log->fd = open(path, O_RDWR | O_CREAT, 0600);
    148     if (log->fd < 0) {
     877    strcpy(log->id, LOGID);
     878    log->fd = -1;
     879
     880    /* save path and end of path for name construction */
     881    n = strlen(path);
     882    log->path = malloc(n + 9);              /* allow for ".repairs" */
     883    if (log->path == NULL) {
    149884        free(log);
    150885        return NULL;
    151886    }
    152     if (lock(log->fd)) {
    153         close(log->fd);
     887    strcpy(log->path, path);
     888    log->end = log->path + n;
     889
     890    /* gain exclusive access and verify log file -- may perform a
     891       recovery operation if needed */
     892    if (log_open(log)) {
     893        free(log->path);
    154894        free(log);
    155895        return NULL;
    156896    }
    157897
    158     /* if file is empty, write new gzip stream */
    159     if (lseek(log->fd, 0, SEEK_END) == 0) {
    160         if (write(log->fd, empty_gz, sizeof(empty_gz)) != sizeof(empty_gz)) {
    161             log_clean(log);
    162             return NULL;
     898    /* return pointer to log structure */
     899    return log;
     900}
     901
     902/* gzlog_compress() return values:
     903    0: all good
     904   -1: file i/o error (usually access issue)
     905   -2: memory allocation failure
     906   -3: invalid log pointer argument */
     907int gzlog_compress(gzlog *logd)
     908{
     909    int fd, ret;
     910    uint block;
     911    size_t len, next;
     912    unsigned char *data, buf[5];
     913    struct log *log = logd;
     914
     915    /* check arguments */
     916    if (log == NULL || strcmp(log->id, LOGID) || len < 0)
     917        return -3;
     918
     919    /* see if we lost the lock -- if so get it again and reload the extra
     920       field information (it probably changed), recover last operation if
     921       necessary */
     922    if (log_check(log) && log_open(log))
     923        return -1;
     924
     925    /* create space for uncompressed data */
     926    len = ((size_t)(log->last - log->first) & ~(((size_t)1 << 10) - 1)) +
     927          log->stored;
     928    if ((data = malloc(len)) == NULL)
     929        return -2;
     930
     931    /* do statement here is just a cheap trick for error handling */
     932    do {
     933        /* read in the uncompressed data */
     934        if (lseek(log->fd, log->first - 1, SEEK_SET) < 0)
     935            break;
     936        next = 0;
     937        while (next < len) {
     938            if (read(log->fd, buf, 5) != 5)
     939                break;
     940            block = PULL2(buf + 1);
     941            if (next + block > len ||
     942                read(log->fd, (char *)data + next, block) != block)
     943                break;
     944            next += block;
    163945        }
    164     }
    165 
    166     /* check gzip header */
    167     (void)lseek(log->fd, 0, SEEK_SET);
    168     if (read(log->fd, temp, 12) != 12 || temp[0] != 0x1f ||
    169         temp[1] != 0x8b || temp[2] != 8 || (temp[3] & 4) == 0) {
    170         log_clean(log);
    171         return NULL;
    172     }
    173 
    174     /* process extra field to find "ap" sub-field */
    175     xlen = temp[10] + (temp[11] << 8);
    176     good = 0;
    177     while (xlen) {
    178         if (xlen < 4 || read(log->fd, temp, 4) != 4)
     946        if (lseek(log->fd, 0, SEEK_CUR) != log->last + 4 + log->stored)
    179947            break;
    180         sub_len = temp[2];
    181         sub_len += temp[3] << 8;
    182         xlen -= 4;
    183         if (memcmp(temp, "ap", 2) == 0 && sub_len == 16) {
    184             good = 1;
     948        log_touch(log);
     949
     950        /* write the uncompressed data to the .add file */
     951        strcpy(log->end, ".add");
     952        fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
     953        if (fd < 0)
    185954            break;
    186         }
    187         if (xlen < sub_len)
     955        ret = write(fd, data, len) != len;
     956        if (ret | close(fd))
    188957            break;
    189         (void)lseek(log->fd, sub_len, SEEK_CUR);
    190         xlen -= sub_len;
    191     }
    192     if (!good) {
    193         log_clean(log);
    194         return NULL;
    195     }
    196 
    197     /* read in "ap" sub-field */
    198     log->extra = lseek(log->fd, 0, SEEK_CUR);
    199     if (read(log->fd, temp, 16) != 16) {
    200         log_clean(log);
    201         return NULL;
    202     }
    203     log->mark_off = make_off(temp);
    204     log->last_off = make_off(temp + 8);
    205 
    206     /* get crc, length of gzip file */
    207     (void)lseek(log->fd, log->last_off, SEEK_SET);
    208     if (read(log->fd, temp, 13) != 13 ||
    209         memcmp(temp, "\001\000\000\377\377", 5) != 0) {
    210         log_clean(log);
    211         return NULL;
    212     }
    213     log->crc = make_ulg(temp + 5);
    214     log->len = make_ulg(temp + 9);
    215 
    216     /* set up to write over empty last block */
    217     (void)lseek(log->fd, log->last_off + 5, SEEK_SET);
    218     log->stored = 0;
    219     return (void *)log;
    220 }
    221 
    222 /* maximum amount to put in a stored block before starting a new one */
    223 #define MAX_BLOCK 16384
    224 
    225 /* write a block to a log object */
    226 int gzlog_write(void *obj, char *data, size_t len)
    227 {
    228     size_t some;
    229     unsigned char temp[5];
    230     gz_log *log;
    231 
    232     /* check object */
    233     log = (gz_log *)obj;
    234     if (log == NULL || log->id != GZLOGID)
    235         return 1;
    236 
    237     /* write stored blocks until all of the input is written */
    238     do {
    239         some = MAX_BLOCK - log->stored;
    240         if (some > len)
    241             some = len;
    242         if (write(log->fd, data, some) != some)
    243             return 1;
    244         log->crc = crc32(log->crc, data, some);
    245         log->len += some;
    246         len -= some;
    247         data += some;
    248         log->stored += some;
    249 
    250         /* if the stored block is full, end it and start another */
    251         if (log->stored == MAX_BLOCK) {
    252             (void)lseek(log->fd, log->last_off, SEEK_SET);
    253             temp[0] = 0;
    254             dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16),
    255                      temp + 1);
    256             if (write(log->fd, temp, 5) != 5)
    257                 return 1;
    258             log->last_off = lseek(log->fd, log->stored, SEEK_CUR);
    259             (void)lseek(log->fd, 5, SEEK_CUR);
    260             log->stored = 0;
    261         }
    262     } while (len);
     958        log_touch(log);
     959
     960        /* write the dictionary for the next compress to the .temp file */
     961        strcpy(log->end, ".temp");
     962        fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
     963        if (fd < 0)
     964            break;
     965        next = DICT > len ? len : DICT;
     966        ret = write(fd, (char *)data + len - next, next) != next;
     967        if (ret | close(fd))
     968            break;
     969        log_touch(log);
     970
     971        /* roll back to compressed data, mark the compress in progress */
     972        log->last = log->first;
     973        log->stored = 0;
     974        if (log_mark(log, COMPRESS_OP))
     975            break;
     976        BAIL(7);
     977
     978        /* compress and append the data (clears mark) */
     979        ret = log_compress(log, data, len);
     980        free(data);
     981        return ret;
     982    } while (0);
     983
     984    /* broke out of do above on i/o error */
     985    free(data);
     986    return -1;
     987}
     988
     989/* gzlog_write() return values:
     990    0: all good
     991   -1: file i/o error (usually access issue)
     992   -2: memory allocation failure
     993   -3: invalid log pointer argument */
     994int gzlog_write(gzlog *logd, void *data, size_t len)
     995{
     996    int fd, ret;
     997    struct log *log = logd;
     998
     999    /* check arguments */
     1000    if (log == NULL || strcmp(log->id, LOGID) || len < 0)
     1001        return -3;
     1002    if (data == NULL || len == 0)
     1003        return 0;
     1004
     1005    /* see if we lost the lock -- if so get it again and reload the extra
     1006       field information (it probably changed), recover last operation if
     1007       necessary */
     1008    if (log_check(log) && log_open(log))
     1009        return -1;
     1010
     1011    /* create and write .add file */
     1012    strcpy(log->end, ".add");
     1013    fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
     1014    if (fd < 0)
     1015        return -1;
     1016    ret = write(fd, data, len) != len;
     1017    if (ret | close(fd))
     1018        return -1;
     1019    log_touch(log);
     1020
     1021    /* mark log file with append in progress */
     1022    if (log_mark(log, APPEND_OP))
     1023        return -1;
     1024    BAIL(8);
     1025
     1026    /* append data (clears mark) */
     1027    if (log_append(log, data, len))
     1028        return -1;
     1029
     1030    /* check to see if it's time to compress -- if not, then done */
     1031    if (((log->last - log->first) >> 10) + (log->stored >> 10) < TRIGGER)
     1032        return 0;
     1033
     1034    /* time to compress */
     1035    return gzlog_compress(log);
     1036}
     1037
     1038/* gzlog_close() return values:
     1039    0: ok
     1040   -3: invalid log pointer argument */
     1041int gzlog_close(gzlog *logd)
     1042{
     1043    struct log *log = logd;
     1044
     1045    /* check arguments */
     1046    if (log == NULL || strcmp(log->id, LOGID))
     1047        return -3;
     1048
     1049    /* close the log file and release the lock */
     1050    log_close(log);
     1051
     1052    /* free structure and return */
     1053    if (log->path != NULL)
     1054        free(log->path);
     1055    strcpy(log->id, "bad");
     1056    free(log);
    2631057    return 0;
    2641058}
    265 
    266 /* recompress the remaining stored deflate data in place */
    267 local int recomp(gz_log *log)
    268 {
    269     z_stream strm;
    270     size_t len, max;
    271     unsigned char *in;
    272     unsigned char *out;
    273     unsigned char temp[16];
    274 
    275     /* allocate space and read it all in (it's around 1 MB) */
    276     len = log->last_off - log->mark_off;
    277     max = len + (len >> 12) + (len >> 14) + 11;
    278     out = malloc(max);
    279     if (out == NULL)
    280         return 1;
    281     in = malloc(len);
    282     if (in == NULL) {
    283         free(out);
    284         return 1;
    285     }
    286     (void)lseek(log->fd, log->mark_off, SEEK_SET);
    287     if (read(log->fd, in, len) != len) {
    288         free(in);
    289         free(out);
    290         return 1;
    291     }
    292 
    293     /* recompress in memory, decoding stored data as we go */
    294     /* note: this assumes that unsigned is four bytes or more */
    295     /*       consider not making that assumption */
    296     strm.zalloc = Z_NULL;
    297     strm.zfree = Z_NULL;
    298     strm.opaque = Z_NULL;
    299     if (deflateInit2(&strm, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8,
    300         Z_DEFAULT_STRATEGY) != Z_OK) {
    301         free(in);
    302         free(out);
    303         return 1;
    304     }
    305     strm.next_in = in;
    306     strm.avail_out = max;
    307     strm.next_out = out;
    308     while (len >= 5) {
    309         if (strm.next_in[0] != 0)
    310             break;
    311         strm.avail_in = strm.next_in[1] + (strm.next_in[2] << 8);
    312         strm.next_in += 5;
    313         len -= 5;
    314         if (strm.avail_in != 0) {
    315             if (len < strm.avail_in)
    316                 break;
    317             len -= strm.avail_in;
    318             (void)deflate(&strm, Z_NO_FLUSH);
    319             if (strm.avail_in != 0 || strm.avail_out == 0)
    320                 break;
    321         }
    322     }
    323     (void)deflate(&strm, Z_SYNC_FLUSH);
    324     (void)deflateEnd(&strm);
    325     free(in);
    326     if (len != 0 || strm.avail_out == 0) {
    327         free(out);
    328         return 1;
    329     }
    330 
    331     /* overwrite stored data with compressed data */
    332     (void)lseek(log->fd, log->mark_off, SEEK_SET);
    333     len = max - strm.avail_out;
    334     if (write(log->fd, out, len) != len) {
    335         free(out);
    336         return 1;
    337     }
    338     free(out);
    339 
    340     /* write last empty block, crc, and length */
    341     log->mark_off = log->last_off = lseek(log->fd, 0, SEEK_CUR);
    342     temp[0] = 1;
    343     dice_ulg(0xffffL << 16, temp + 1);
    344     dice_ulg(log->crc, temp + 5);
    345     dice_ulg(log->len, temp + 9);
    346     if (write(log->fd, temp, 13) != 13)
    347         return 1;
    348 
    349     /* truncate file to discard remaining stored data and old trailer */
    350     ftruncate(log->fd, lseek(log->fd, 0, SEEK_CUR));
    351 
    352     /* update extra field to point to new last empty block */
    353     (void)lseek(log->fd, log->extra, SEEK_SET);
    354     dice_off(log->mark_off, temp);
    355     dice_off(log->last_off, temp + 8);
    356     if (write(log->fd, temp, 16) != 16)
    357         return 1;
    358     return 0;
    359 }
    360 
    361 /* maximum accumulation of stored blocks before compressing */
    362 #define MAX_STORED 1048576
    363 
    364 /* close log object */
    365 int gzlog_close(void *obj)
    366 {
    367     unsigned char temp[8];
    368     gz_log *log;
    369 
    370     /* check object */
    371     log = (gz_log *)obj;
    372     if (log == NULL || log->id != GZLOGID)
    373         return 1;
    374 
    375     /* go to start of most recent block being written */
    376     (void)lseek(log->fd, log->last_off, SEEK_SET);
    377 
    378     /* if some stuff was put there, update block */
    379     if (log->stored) {
    380         temp[0] = 0;
    381         dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16),
    382                  temp + 1);
    383         if (write(log->fd, temp, 5) != 5)
    384             return 1;
    385         log->last_off = lseek(log->fd, log->stored, SEEK_CUR);
    386     }
    387 
    388     /* write last block (empty) */
    389     if (write(log->fd, "\001\000\000\377\377", 5) != 5)
    390         return 1;
    391 
    392     /* write updated crc and uncompressed length */
    393     dice_ulg(log->crc, temp);
    394     dice_ulg(log->len, temp + 4);
    395     if (write(log->fd, temp, 8) != 8)
    396         return 1;
    397 
    398     /* put offset of that last block in gzip extra block */
    399     (void)lseek(log->fd, log->extra + 8, SEEK_SET);
    400     dice_off(log->last_off, temp);
    401     if (write(log->fd, temp, 8) != 8)
    402         return 1;
    403 
    404     /* if more than 1 MB stored, then time to compress it */
    405     if (log->last_off - log->mark_off > MAX_STORED) {
    406         if (recomp(log))
    407             return 1;
    408     }
    409 
    410     /* unlock and close file */
    411     log_clean(log);
    412     return 0;
    413 }
Note: See TracChangeset for help on using the changeset viewer.