#!/usr/bin/perl
#
# Script to retrieve a single article from a news server, either by
# message-id or by group name and article number.
#
# Usage:     nntpid <messageid>
#    or:     nntpid messageid               (angle brackets optional)
#    or:     nntpid news.group.name 1234    (group+number form)
#    or:     nntpid -a news.group.name      (download all available articles)
#
# The name of your news server is obtained from the environment variable
# NNTPSERVER, or from the file /etc/nntpserver if that's not set.
#
# This script supports AUTHINFO GENERIC authentication using the
# environment variable NNTPAUTH. It will only attempt this if it receives
# a 480 response from the news server; if your news server isn't paranoid
# then the script will never need to look at NNTPAUTH.

# Copyright 2000,2004,2011 Simon Tatham. All rights reserved.

require 5.002;
use Socket;
use FileHandle;

$usage =
  "usage: nntpid [ -v ] [ -d ] <article> [<article>...]           display articles\n" .
  "   or: nntpid [ -v ] [ -d ]           display articles read from standard input\n" .
  "   or: nntpid [ -v ] -a <newsgroup>             dump a newsgroup in mbox format\n" .
  "where: <article>           a news article specified in one of several ways:\n" .
  "                            - Message-Id in angle brackets\n" .
  "                            - bare Message-Id without angle brackets\n" .
  "                            - newsgroup and article number in separate words\n" .
  "                            - newsgroup and article number separated by :\n" .
  "       -v                  verbose (print interaction with news server)\n" .
  "       -d                  direct output (don't consider using PAGER)\n" .
  "       -a                  dump all articles in group to stdout as mbox\n" .
  "       -b                  return current bounds on group's article numbers\n" .
  " also: nntpid --version    report version number\n" .
  "       nntpid --help       display this help text\n" .
  "       nntpid --licence    display (MIT) licence text\n";

$licence =
  "nntpid is copyright 2000,2004,2011 Simon Tatham.\n" .
  "\n" .
  "Permission is hereby granted, free of charge, to any person\n" .
  "obtaining a copy of this software and associated documentation files\n" .
  "(the \"Software\"), to deal in the Software without restriction,\n" .
  "including without limitation the rights to use, copy, modify, merge,\n" .
  "publish, distribute, sublicense, and/or sell copies of the Software,\n" .
  "and to permit persons to whom the Software is furnished to do so,\n" .
  "subject to the following conditions:\n" .
  "\n" .
  "The above copyright notice and this permission notice shall be\n" .
  "included in all copies or substantial portions of the Software.\n" .
  "\n" .
  "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n" .
  "EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n" .
  "MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n" .
  "NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n" .
  "BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n" .
  "ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n" .
  "CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n" .
  "SOFTWARE.\n";

$pager = 1;
$verbose = 0;
$mode = 'list';

while ($ARGV[0] =~ /^-(.+)$/) {
  shift @ARGV;
  $verbose = 1, next if $1 eq "v";
  $pager = 0, next if $1 eq "d";
  $mode = 'all', next if $1 eq "a";
  $mode = 'bounds', next if $1 eq "b";
  if ($1 eq "-help") {
    print STDERR $usage;
    exit 0;
  } elsif ($1 eq "-version") {
    print "nntpid, version 20260326.66a3363\n";
    exit 0;
  } elsif ($1 eq "-licence" or $1 eq "-license") {
    print $licence;
    exit 0;
  }
}

if ($mode eq 'all') {
  # -a uses completely different command-line semantics from the
  # normal ones..
  die "nntpid: -a expected exactly one argument\n" if @ARGV != 1;
  $group = $ARGV[0];
} elsif ($mode eq 'bounds') {
  # -b has similar command-line semantics to -a.
  die "nntpid: -b expected exactly one argument\n" if @ARGV != 1;
  $group = $ARGV[0];
} elsif (!@ARGV) {
  # We will read article ids from standard input once we've connected
  # to the NNTP server.
  $mode = 'stdin';
} else {
  @list = ();
  while (defined ($arg = shift @ARGV)) {
    # See if this argument makes sense on its own.
    ($group, $mid) = &parsearticle($arg);
    if (defined $mid) {
      push @list, $arg;
    } else {
      # If it doesn't, try concatenating it with a space to the next
      # argument (so you can provide a group and article number in two
      # successive command-line arguments).
      $args = $arg . " " . $ARGV[0];
      ($group, $mid) = &parsearticle($args);
      if (defined $mid) {
        push @list, $args;
        shift @ARGV; # and eat the second argument
      } else {
        # If all else fails, die in panic.
        die "nntpid: argument '$arg': unable to parse\n";
      }
    }
  }
}

$ns=$ENV{'NNTPSERVER'};
if (!defined $ns or !length $ns) {
  $ns = `cat /etc/nntpserver`;
  chomp $ns;
}
$port = (getservbyname("nntp", "tcp"))[2];
$ns = inet_aton($ns);
$proto = getprotobyname("tcp");
$paddr = sockaddr_in($port, $ns);

&connect;
if ($mode eq 'all') {
  # Write out the entire contents of a newsgroup in mbox format.
  $numbers = &docmd("GROUP $group");
  @numbers = split / /, $numbers;
  $fatal = 0; # ignore failure to retrieve any given article
  for ($mid = $numbers[1]; $mid <= $numbers[2]; $mid++) {
    $art = &getart("$group:$mid");
    if (defined $art) {
      $art =~ s/\n(>*From )/\n>$1/gs;
      print "From nntpid ".(localtime)."\n".$art."\n";
    }
  }
} elsif ($mode eq 'bounds') {
  # Write out the bounds of the group's article numbers.
  $numbers = &docmd("GROUP $group");
  @numbers = split / /, $numbers;
  print "$numbers[1] $numbers[2]\n";
} elsif ($mode eq 'stdin') {
  while (<<>>) {
    chomp;
    s/^\s+//; s/\s+$//; # trim whitespace
    &displayarticle($_);
  }
} elsif ($mode eq 'list') {
  for $item (@list) {
    &displayarticle($item);
  }
}

sub parsearticle {
  # Article identifiers used as input to this program can be in a
  # variety of formats. This function untangles one into a standard
  # format, which is either (undef, message-id) or (group, article
  # number). In case of parse failure, it returns (undef, undef).
  my $art = shift @_;
  if ($art =~ /^(.*<)?([^<>]*\@[^<>]*)(>.*)?$/) {
    # Anything with an @ sign is treated as a Message-ID. We trim
    # angle brackets and anything outside them.
    return (undef, $2);
  } elsif ($art =~ /^(\S+)(\s+|:)(\d+)$/) {
    # A group name and article number separated by whitespace or a
    # colon.
    return ($1, $3);
  } else {
    # Unable to parse.
    return (undef, undef);
  }
}

sub displayarticle {
  my $mid = shift @_; 

  &connect;

  my $art = &getart($mid);
  return unless defined $art;

  if ($pager and -t STDOUT) {
    # Close the NNTP connection before invoking the pager, in case the
    # user spends so long looking at the article that the server times
    # us out.
    &disconnect;

    $pagername = $ENV{"PAGER"};
    $pagername = "more" unless defined $pagername;
    open PAGER, "| $pagername";
    print PAGER $art;
    close PAGER;
  } else {
    print $art;
  }
}

sub getart {
  my $art = shift @_;
  my $group;
  my $mid;

  ($group, $mid) = &parsearticle($art);
  if (!defined $mid) {
    warn "unable to parse '$art'\n";
    return undef;
  } elsif (defined $group) {
    # This is a (group, article number) pair.
    &docmd("GROUP $group");
    $ret = &docmd("ARTICLE $mid");
  } else {
    # This is a Message-Id. Some NNTP servers will insist on having
    # seen a GROUP command before 'ARTICLE <some.random@message.id>',
    # so ensure we've sent one.
    &docmd("GROUP misc.misc") unless $in_a_group;
    $ret = &docmd("ARTICLE <$mid>");
  }

  return undef if !defined $ret;
  $in_a_group = 1;

  $art = "";
  while (1) {
    &getline;
    s/[\r\n]//g;
    last if /^\.$/;
    s/^\.//;
    $art .= "$_\n";
  }
  return $art;
}

sub putline {
  my ($line) = @_;
  print STDERR ">>> $line\n" if $verbose;
  print S "$line\r\n";
}

sub getline {
  $_ = <S>;
  s/[\r\n]*$//s;
  $code = substr($_,0,3);
  print STDERR "<<< $_\n" if $verbose;
  return substr($_,4);
}

sub connect {
  return if $connected;
  socket(S,PF_INET,SOCK_STREAM,$proto) or die "socket: $!";
  connect(S,$paddr) or die "connect: $!";

  S->autoflush(1);

  $fatal = 1; # most errors need to be fatal

  &getline;
  $code =~ /^2\d\d/ or die "no initial greeting from server\n";

  &docmd("MODE READER");

  $connected = 1;
  $in_a_group = 0;
}

sub disconnect {
  &docmd("QUIT");
  close S;
  $connected = 0;
}

sub docmd {
  my ($cmd) = @_;
  # We go at most twice round the following loop. If the first attempt
  # to fetch the article fails with a 480 response, we try again
  # having authenticated first; but if the second attempt also fails
  # with 480, then the authentication didn't work, so we should give
  # up rather than try it pointlessly again.
  for my $n (0,1) {
    &putline($cmd);
    $line = &getline;
    if ($code eq "480") { &auth; } else { last; }
  }
  if ($code !~ /^2\d\d/) {
    die "failed on `$cmd':\n$_\n" if $fatal;
    return undef;
  }
  return $line;
}

sub auth {
  # Authentication.
  if ($ENV{"NNTPAUTH"}) {
    $auth = $ENV{"NNTPAUTH"};
    &putline("AUTHINFO GENERIC $auth");
    pipe AUTHSTDIN, TOAUTH or die "unable to create pipes";
    pipe FROMAUTH, AUTHSTDOUT or die "unable to create pipes";
    $pid = fork;
    if (!defined $pid) {
      die "unable to fork for authentication helper";
    } elsif ($pid == 0) {
      # we are child
      $ENV{"NNTP_AUTH_FDS"} = "0.1";
      open STDIN, "<&AUTHSTDIN";
      open STDOUT, ">&AUTHSTDOUT";
      close S;
      exec $auth;
    }
    # we are parent
    close AUTHSTDIN;
    close AUTHSTDOUT;
    autoflush TOAUTH 1;
    &getline; print TOAUTH "$_\n";
    while (<FROMAUTH>) {
      s/[\r\n]*$//s;
      &putline($_);
      &getline;
      print TOAUTH "$_\n";
    }
    die "failed authentication\n" unless $? == 0;
  }
}
