lib/date.c

   1 /*  $Id: date.c 7136 2005-03-11 19:18:27Z rra $
   2 **
   3 **  Date parsing and conversion routines.
   4 **
   5 **  Provides various date parsing and conversion routines, including
   6 **  generating Date headers for posted articles.  Note that the parsedate
   7 **  parser is separate from this file.
   8 */
   9
  10 #include "config.h"
  11 #include "clibrary.h"
  12 #include <ctype.h>
  13 #include <time.h>
  14
  15 #include "libinn.h"
  16
  17 /*
  18 **  Time constants.
  19 **
  20 **  Do not translate these names.  RFC 822 by way of RFC 1036 requires that
  21 **  weekday and month names *not* be translated.  This is why we use static
  22 **  tables rather than strftime for building dates, to avoid locale
  23 **  interference.
  24 */
  25
  26 static const char WEEKDAY[7][4] = {
  27     "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
  28 };
  29
  30 static const char MONTH[12][4] = {
  31     "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
  32     "Nov", "Dec"
  33 };
  34
  35 /* Number of days in a month. */
  36 static const int MONTHDAYS[] = {
  37     31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
  38 };
  39
  40 /* Non-numeric time zones.  Supporting these is required to support the
  41    obsolete date format of RFC 2822.  The military time zones are handled
  42    separately. */
  43 static const struct {
  44     const char name[4];
  45     long offset;
  46 } ZONE_OFFSET[] = {
  47     { "UT", 0 },                { "GMT", 0 },
  48     { "EDT", -4 * 60 * 60 },    { "EST", -5 * 60 * 60 },
  49     { "CDT", -5 * 60 * 60 },    { "CST", -6 * 60 * 60 },
  50     { "MDT", -6 * 60 * 60 },    { "MST", -7 * 60 * 60 },
  51     { "PDT", -7 * 60 * 60 },    { "PST", -8 * 60 * 60 },
  52 };
  53
  54
  55 /*
  56 **  Time parsing macros.
  57 */
  58
  59 /* Whether a given year is a leap year. */
  60 #define ISLEAP(year) \
  61     (((year) % 4) == 0 && (((year) % 100) != 0 || ((year) % 400) == 0))
  62
  63
  64 /*
  65 **  RFC 2822 date parsing rules.
  66 */
  67
  68 /* The data structure to store a rule.  The interpretation of the other fields
  69    is based on the value of type.  For NUMBER, read between min and max
  70    characters and convert to a number.  For LOOKUP, look for max characters
  71    and find that string in the provided table (with size elements).  For
  72    DELIM, just make sure that we see the character stored in delimiter. */
  73 struct rule {
  74     enum {
  75         TYPE_NUMBER,
  76         TYPE_LOOKUP,
  77         TYPE_DELIM
  78     } type;
  79     char delimiter;
  80     const char (*table)[4];
  81     size_t size;
  82     int min;
  83     int max;
  84 };
  85
  86
  87 /*
  88 **  Given a time as a time_t, return the offset in seconds of the local time
  89 **  zone from UTC at that time (adding the offset to UTC time yields local
  90 **  time).  If the second argument is true, the time represents the current
  91 **  time and in that circumstance we can assume that timezone/altzone are
  92 **  correct.  (We can't for arbitrary times in the past.)
  93 */
  94 static long
  95 local_tz_offset(time_t date, bool current UNUSED)
  96 {
  97     struct tm *tm;
  98 #if !HAVE_TM_GMTOFF
  99     struct tm local, gmt;
 100     long offset;
 101 #endif
 102
 103     tm = localtime(&date);
 104
 105 #if !HAVE_TM_GMTOFF && HAVE_VAR_TIMEZONE
 106     if (current)
 107         return (tm->tm_isdst > 0) ? -altzone : -timezone;
 108 #endif
 109
 110 #if HAVE_TM_GMTOFF
 111     return tm->tm_gmtoff;
 112 #else
 113     /* We don't have any easy returnable value, so we call both localtime
 114        and gmtime and calculate the difference.  Assume that local time is
 115        never more than 24 hours away from UTC and ignore seconds. */
 116     local = *tm;
 117     tm = gmtime(&date);
 118     gmt = *tm;
 119     offset = local.tm_yday - gmt.tm_yday;
 120     if (offset < -1) {
 121         /* Local time is in the next year. */
 122         offset = 24;
 123     } else if (offset > 1) {
 124         /* Local time is in the previous year. */
 125         offset = -24;
 126     } else {
 127         offset *= 24;
 128     }
 129     offset += local.tm_hour - gmt.tm_hour;
 130     offset *= 60;
 131     offset += local.tm_min - gmt.tm_min;
 132     return offset * 60;
 133 #endif /* !HAVE_TM_GMTOFF */
 134 }
 135
 136
 137 /*
 138 **  Given a time_t, a flag saying whether to use local time, a buffer, and
 139 **  the length of the buffer, write the contents of a valid RFC 2822 / RFC
 140 **  1036 Date header into the buffer (provided it's long enough).  Returns
 141 **  true on success, false if the buffer is too long.  Use snprintf rather
 142 **  than strftime to be absolutely certain that locales don't result in the
 143 **  wrong output.  If the time is -1, obtain and use the current time.
 144 */
 145 bool
 146 makedate(time_t date, bool local, char *buff, size_t buflen)
 147 {
 148     time_t realdate;
 149     struct tm *tmp_tm;
 150     struct tm tm;
 151     long tz_offset;
 152     int tz_hour_offset, tz_min_offset, tz_sign;
 153     size_t date_length;
 154     const char *tz_name;
 155
 156     /* Make sure the buffer is large enough.  A complete RFC 2822 date with
 157        spaces wherever FWS is required and the optional weekday takes:
 158
 159                     1         2         3
 160            1234567890123456789012345678901
 161            Sat, 31 Aug 2002 23:45:18 +0000
 162
 163        31 characters, plus another character for the trailing nul.  The buffer
 164        will need to have another six characters of space to get the optional
 165        trailing time zone comment. */
 166     if (buflen < 32)
 167         return false;
 168
 169     /* Get the current time if the provided time is -1. */
 170     realdate = (date == (time_t) -1) ? time(NULL) : date;
 171
 172     /* RFC 2822 says the timezone offset is given as [+-]HHMM, so we have to
 173        separate the offset into a sign, hours, and minutes.  Dividing the
 174        offset by 36 looks like it works, but will fail for any offset that
 175        isn't an even number of hours, and there are half-hour timezones. */
 176     if (local) {
 177         tmp_tm = localtime(&realdate);
 178         tm = *tmp_tm;
 179         tz_offset = local_tz_offset(realdate, date == (time_t) -1);
 180         tz_sign = (tz_offset < 0) ? -1 : 1;
 181         tz_offset *= tz_sign;
 182         tz_hour_offset = tz_offset / 3600;
 183         tz_min_offset = (tz_offset % 3600) / 60;
 184     } else {
 185         tmp_tm = gmtime(&realdate);
 186         tm = *tmp_tm;
 187         tz_sign = 1;
 188         tz_hour_offset = 0;
 189         tz_min_offset = 0;
 190     }
 191
 192     /* tz_min_offset cannot be larger than 60 (by basic mathematics).  If
 193        through some insane circumtances, tz_hour_offset would be larger,
 194        reject the time as invalid rather than generate an invalid date. */
 195     if (tz_hour_offset > 24)
 196         return false;
 197
 198     /* Generate the actual date string, sans the trailing time zone comment
 199        but with the day of the week and the seconds (both of which are
 200        optional in the standard). */
 201     snprintf(buff, buflen, "%3.3s, %d %3.3s %d %02d:%02d:%02d %c%02d%02d",
 202              &WEEKDAY[tm.tm_wday][0], tm.tm_mday, &MONTH[tm.tm_mon][0],
 203              1900 + tm.tm_year, tm.tm_hour, tm.tm_min, tm.tm_sec,
 204              (tz_sign > 0) ? '+' : '-', tz_hour_offset, tz_min_offset);
 205     date_length = strlen(buff);
 206
 207     /* Now, get a pointer to the time zone abbreviation, and if there is
 208        enough room in the buffer, add it to the end of the date string as a
 209        comment. */
 210     if (!local) {
 211         tz_name = "UTC";
 212     } else {
 213 #if HAVE_TM_ZONE
 214         tz_name = tm.tm_zone;
 215 #elif HAVE_VAR_TZNAME
 216         tz_name = tzname[(tm.tm_isdst > 0) ? 1 : 0];
 217 #else
 218         tz_name = NULL;
 219 #endif
 220     }
 221     if (tz_name != NULL && date_length + 4 + strlen(tz_name) <= buflen) {
 222         snprintf(buff + date_length, buflen - date_length, " (%s)", tz_name);
 223     }
 224     return true;
 225 }
 226
 227
 228 /*
 229 **  Given a struct tm representing a calendar time in UTC, convert it to
 230 **  seconds since epoch.  Returns (time_t) -1 if the time is not
 231 **  convertable.  Note that this function does not canonicalize the provided
 232 **  struct tm, nor does it allow out of range values or years before 1970.
 233 */
 234 static time_t
 235 mktime_utc(const struct tm *tm)
 236 {
 237     time_t result = 0;
 238     int i;
 239
 240     /* We do allow some ill-formed dates, but we don't do anything special
 241        with them and our callers really shouldn't pass them to us.  Do
 242        explicitly disallow the ones that would cause invalid array accesses
 243        or other algorithm problems. */
 244     if (tm->tm_mon < 0 || tm->tm_mon > 11 || tm->tm_year < 70)
 245         return (time_t) -1;
 246
 247     /* Convert to a time_t. */
 248     for (i = 1970; i < tm->tm_year + 1900; i++)
 249         result += 365 + ISLEAP(i);
 250     for (i = 0; i < tm->tm_mon; i++)
 251         result += MONTHDAYS[i];
 252     if (tm->tm_mon > 1 && ISLEAP(tm->tm_year + 1900))
 253         result++;
 254     result = 24 * (result + tm->tm_mday - 1) + tm->tm_hour;
 255     result = 60 * result + tm->tm_min;
 256     result = 60 * result + tm->tm_sec;
 257     return result;
 258 }
 259
 260
 261 /*
 262 **  Check the ranges of values in a struct tm to make sure that the date was
 263 **  well-formed.  Assumes that the year has already been correctly set to
 264 **  something (but may be before 1970).
 265 */
 266 static bool
 267 valid_tm(const struct tm *tm)
 268 {
 269     if (tm->tm_sec > 60 || tm->tm_min > 59 || tm->tm_hour > 23)
 270         return false;
 271     if (tm->tm_mday < 1 || tm->tm_mon < 0 || tm->tm_mon > 11)
 272         return false;
 273
 274     /* Make sure that the day isn't past the end of the month, allowing for
 275        leap years. */
 276     if (tm->tm_mday > MONTHDAYS[tm->tm_mon]
 277         && (tm->tm_mon != 1 || tm->tm_mday > 29
 278             || !ISLEAP(tm->tm_year + 1900)))
 279         return false;
 280
 281     /* We can't handle years before 1970. */
 282     if (tm->tm_year < 70)
 283         return false;
 284
 285     return true;
 286 }
 287
 288
 289 /*
 290 **  Parse a date in the format used in NNTP commands such as NEWGROUPS and
 291 **  NEWNEWS.  The first argument is a string of the form YYYYMMDD and the
 292 **  second a string of the form HHMMSS.  The third argument is a boolean
 293 **  flag saying whether the date is specified in local time; if false, the
 294 **  date is assumed to be in UTC.  Returns the time_t corresponding to the
 295 **  given date and time or (time_t) -1 in the event of an error.
 296 */
 297 time_t
 298 parsedate_nntp(const char *date, const char *hour, bool local)
 299 {
 300     const char *p;
 301     size_t datelen;
 302     time_t now, result;
 303     struct tm tm;
 304     struct tm *current;
 305     int century;
 306
 307     /* Accept YYMMDD and YYYYMMDD.  The first is what RFC 977 requires.  The
 308        second is what the revision of RFC 977 will require. */
 309     datelen = strlen(date);
 310     if ((datelen != 6 && datelen != 8) || strlen(hour) != 6)
 311         return (time_t) -1;
 312     for (p = date; *p; p++)
 313         if (!CTYPE(isdigit, *p))
 314             return (time_t) -1;
 315     for (p = hour; *p; p++)
 316         if (!CTYPE(isdigit, *p))
 317             return (time_t) -1;
 318
 319     /* Parse the date into a struct tm, skipping over the century part of
 320        the year, if any.  We'll deal with it in a moment. */
 321     tm.tm_isdst = -1;
 322     p = date + datelen - 6;
 323     tm.tm_year = (p[0] - '0') * 10 + p[1] - '0';
 324     tm.tm_mon  = (p[2] - '0') * 10 + p[3] - '0' - 1;
 325     tm.tm_mday = (p[4] - '0') * 10 + p[5] - '0';
 326     p = hour;
 327     tm.tm_hour = (p[0] - '0') * 10 + p[1] - '0';
 328     tm.tm_min  = (p[2] - '0') * 10 + p[3] - '0';
 329     tm.tm_sec  = (p[4] - '0') * 10 + p[5] - '0';
 330
 331     /* Four-digit years are the easy case.
 332
 333        For two-digit years, RFC 977 says "The closest century is assumed as
 334        part of the year (i.e., 86 specifies 1986, 30 specifies 2030, 99 is
 335        1999, 00 is 2000)."  draft-ietf-nntpext-base-10.txt simplifies this
 336        considerably and is what we implement:
 337
 338          If the first two digits of the year are not specified, the year is
 339          to be taken from the current century if YY is smaller than or equal
 340          to the current year, otherwise the year is from the previous
 341          century.
 342
 343        This implementation assumes "current year" means the last two digits
 344        of the current year.  Note that this algorithm interacts poorly with
 345        clients with a slightly fast clock around the turn of a century, as
 346        it may send 00 for the year when the year on the server is still xx99
 347        and have it taken to be 99 years in the past.  But 2000 has come and
 348        gone, and by 2100 news clients *really* should have started using UTC
 349        for everything like the new draft recommends. */
 350     if (datelen == 8) {
 351         tm.tm_year += (date[0] - '0') * 1000 + (date[1] - '0') * 100;
 352         tm.tm_year -= 1900;
 353     } else {
 354         now = time(NULL);
 355         current = local ? localtime(&now) : gmtime(&now);
 356         century = current->tm_year / 100;
 357         if (tm.tm_year > current->tm_year % 100)
 358             century--;
 359         tm.tm_year += century * 100;
 360     }
 361
 362     /* Ensure that all of the date components are within valid ranges. */
 363     if (!valid_tm(&tm))
 364         return (time_t) -1;
 365
 366     /* tm contains the broken-down date; convert it to a time_t.  mktime
 367        assumes the supplied struct tm is in the local time zone; if given a
 368        time in UTC, use our own routine instead. */
 369     result = local ? mktime(&tm) : mktime_utc(&tm);
 370     return result;
 371 }
 372
 373
 374 /*
 375 **  Skip any amount of CFWS (comments and folding whitespace), the RFC 2822
 376 **  grammar term for whitespace, CRLF pairs, and possibly nested comments that
 377 **  may contain escaped parens.  We also allow simple newlines since we don't
 378 **  always deal with wire-format messages.  Note that we do not attempt to
 379 **  ensure that CRLF or a newline is followed by whitespace.  Returns the new
 380 **  position of the pointer.
 381 */
 382 static const char *
 383 skip_cfws(const char *p)
 384 {
 385     int nesting = 0;
 386
 387     for (; *p != '\0'; p++) {
 388         switch (*p) {
 389         case ' ':
 390         case '\t':
 391         case '\n':
 392             break;
 393         case '\r':
 394             if (p[1] != '\n')
 395                 return p;
 396             p++;
 397             break;
 398         case '(':
 399             nesting++;
 400             break;
 401         case ')':
 402             if (nesting == 0)
 403                 return p;
 404             nesting--;
 405             break;
 406         case '\\':
 407             if (nesting == 0 || p[1] == '\0')
 408                 return p;
 409             p++;
 410             break;
 411         default:
 412             if (nesting == 0)
 413                 return p;
 414             break;
 415         }
 416     }
 417     return p;
 418 }
 419
 420
 421 /*
 422 **  Parse a single number.  Takes the parsing rule that we're applying and
 423 **  returns a pointer to the new position of the parse stream.  If there
 424 **  aren't enough digits, return NULL.
 425 */
 426 static const char *
 427 parse_number(const char *p, const struct rule *rule, int *value)
 428 {
 429     int count;
 430
 431     *value = 0;
 432     for (count = 0; *p != '\0' && count < rule->max; p++, count++) {
 433         if (*p < '0' || *p > '9')
 434             break;
 435         *value = *value * 10 + (*p - '0');
 436     }
 437     if (count < rule->min || count > rule->max)
 438         return NULL;
 439     return p;
 440 }
 441
 442
 443 /*
 444 **  Parse a single string value that has to be done via table lookup.  Takes
 445 **  the parsing rule that we're applying.  Puts the index number of the string
 446 **  if found into the value pointerand returns the new position of the string,
 447 **  or NULL if the string could not be found in the table.
 448 */
 449 static const char *
 450 parse_lookup(const char *p, const struct rule *rule, int *value)
 451 {
 452     size_t i;
 453
 454     for (i = 0; i < rule->size; i++)
 455         if (strncasecmp(rule->table[i], p, rule->max) == 0) {
 456             p += rule->max;
 457             *value = i;
 458             return p;
 459         }
 460     return NULL;
 461 }
 462
 463
 464 /*
 465 **  Apply a set of date parsing rules to a string.  Returns the new position
 466 **  in the parse string if this succeeds and NULL if it fails.  As part of the
 467 **  parse, stores values into the value pointer in the array of rules that was
 468 **  passed in.  Takes an array of rules and a count of rules in that array.
 469 */
 470 static const char *
 471 parse_by_rule(const char *p, const struct rule rules[], size_t count,
 472               int *values)
 473 {
 474     size_t i;
 475     const struct rule *rule;
 476
 477     for (i = 0; i < count; i++) {
 478         rule = &rules[i];
 479
 480         switch (rule->type) {
 481         case TYPE_DELIM:
 482             if (*p != rule->delimiter)
 483                 return NULL;
 484             p++;
 485             break;
 486         case TYPE_LOOKUP:
 487             p = parse_lookup(p, rule, &values[i]);
 488             if (p == NULL)
 489                 return NULL;
 490             break;
 491         case TYPE_NUMBER:
 492             p = parse_number(p, rule, &values[i]);
 493             if (p == NULL)
 494                 return NULL;
 495             break;
 496         }
 497
 498         p = skip_cfws(p);
 499     }
 500     return p;
 501 }
 502
 503
 504 /*
 505 **  Parse a legacy time zone.  This uses the parsing rules in RFC 2822,
 506 **  including assigning an offset of 0 to all single-character military time
 507 **  zones due to their ambiguity in practice.  Returns the new position in the
 508 **  parse stream or NULL if we failed to parse the zone.
 509 */
 510 static const char *
 511 parse_legacy_timezone(const char *p, long *offset)
 512 {
 513     const char *end;
 514     size_t max, i;
 515
 516     for (end = p; *end != '\0' && !CTYPE(isspace, *end); end++)
 517         ;
 518     if (end == p)
 519         return NULL;
 520     max = end - p;
 521     for (i = 0; i < ARRAY_SIZE(ZONE_OFFSET); i++)
 522         if (strncasecmp(ZONE_OFFSET[i].name, p, max) == 0) {
 523             p += strlen(ZONE_OFFSET[i].name);
 524             *offset = ZONE_OFFSET[i].offset;
 525             return p;
 526         }
 527     if (max == 1 && CTYPE(isalpha, *p) && *p != 'J' && *p != 'j') {
 528         *offset = 0;
 529         return p + 1;
 530     }
 531     return NULL;
 532 }
 533
 534
 535 /*
 536 **  Parse an RFC 2822 date, accepting the normal and obsolete syntax.  Takes a
 537 **  pointer to the beginning of the date and the length.  Returns the
 538 **  translated time in seconds since epoch, or (time_t) -1 on error.
 539 */
 540 time_t
 541 parsedate_rfc2822(const char *date)
 542 {
 543     const char *p;
 544     int zone_sign;
 545     long zone_offset;
 546     struct tm tm;
 547     int values[8];
 548     time_t result;
 549
 550     /* The basic rules.  Note that we don't bother to check whether the day of
 551        the week is accurate or not. */
 552     static const struct rule base_rule[] = {
 553         { TYPE_LOOKUP, 0,   WEEKDAY, 7,  3, 3 },
 554         { TYPE_DELIM,  ',', NULL,    0,  1, 1 },
 555         { TYPE_NUMBER, 0,   NULL,    0,  1, 2 },
 556         { TYPE_LOOKUP, 0,   MONTH,   12, 3, 3 },
 557         { TYPE_NUMBER, 0,   NULL,    0,  2, 4 },
 558         { TYPE_NUMBER, 0,   NULL,    0,  2, 2 },
 559         { TYPE_DELIM,  ':', NULL,    0,  1, 1 },
 560         { TYPE_NUMBER, 0,   NULL,    0,  2, 2 }
 561     };
 562
 563     /* Optional seconds at the end of the time. */
 564     static const struct rule seconds_rule[] = {
 565         { TYPE_DELIM,  ':', NULL,    0,  1, 1 },
 566         { TYPE_NUMBER, 0,   NULL,    0,  2, 2 }
 567     };
 568
 569     /* Numeric time zone. */
 570     static const struct rule zone_rule[] = {
 571         { TYPE_NUMBER, 0,   NULL,    0,  4, 4 }
 572     };
 573
 574     /* Start with a clean slate. */
 575     memset(&tm, 0, sizeof(struct tm));
 576     memset(values, 0, sizeof(values));
 577
 578     /* Parse the base part of the date.  The initial day of the week is
 579        optional. */
 580     p = skip_cfws(date);
 581     if (CTYPE(isalpha, *p))
 582         p = parse_by_rule(p, base_rule, ARRAY_SIZE(base_rule), values);
 583     else
 584         p = parse_by_rule(p, base_rule + 2, ARRAY_SIZE(base_rule) - 2,
 585                           values + 2);
 586     if (p == NULL)
 587         return (time_t) -1;
 588
 589     /* Stash the results into a struct tm.  Values are associated with the
 590        rule number of the same index. */
 591     tm.tm_mday = values[2];
 592     tm.tm_mon = values[3];
 593     tm.tm_year = values[4];
 594     tm.tm_hour = values[5];
 595     tm.tm_min = values[7];
 596
 597     /* Parse seconds if they're present. */
 598     if (*p == ':') {
 599         p = parse_by_rule(p, seconds_rule, ARRAY_SIZE(seconds_rule), values);
 600         if (p == NULL)
 601             return (time_t) -1;
 602         tm.tm_sec = values[1];
 603     }
 604
 605     /* Time zone.  Unfortunately this is weird enough that we can't use nice
 606        parsing rules for it. */
 607     if (*p == '-' || *p == '+') {
 608         zone_sign = (*p == '+') ? 1 : -1;
 609         p = parse_by_rule(p + 1, zone_rule, ARRAY_SIZE(zone_rule), values);
 610         if (p == NULL)
 611             return (time_t) -1;
 612         zone_offset = ((values[0] / 100) * 60 + values[0] % 100) * 60;
 613         zone_offset *= zone_sign;
 614     } else {
 615         p = parse_legacy_timezone(p, &zone_offset);
 616         if (p == NULL)
 617             return (time_t) -1;
 618     }
 619
 620     /* Fix up the year, using the RFC 2822 rules.  Remember that tm_year
 621        stores the year - 1900. */
 622     if (tm.tm_year < 50)
 623         tm.tm_year += 100;
 624     else if (tm.tm_year >= 1000)
 625         tm.tm_year -= 1900;
 626
 627     /* Done parsing.  Make sure there's nothing left but CFWS and range-check
 628        our results and then convert the struct tm to seconds since epoch and
 629        then apply the time zone offset. */
 630     p = skip_cfws(p);
 631     if (*p != '\0')
 632         return (time_t) -1;
 633     if (!valid_tm(&tm))
 634         return (time_t) -1;
 635     result = mktime_utc(&tm);
 636     return (result == (time_t) -1) ? result : result - zone_offset;
 637 }