#!/usr/local/bin/perl

# Copyright (c) 2004-2013 Matthew Seaman. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#    1.  Redistributions of source code must retain the above
#        copyright notice, this list of conditions and the following
#        disclaimer.
#
#    2.  Redistributions in binary form must reproduce the above
#        copyright notice, this list of conditions and the following
#        disclaimer in the documentation and/or other materials
#        provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.

# @(#) $Id$
#

# Generate an incremental update to the cached 'make describe' output,
# for the listed port origins.  Requires previously built cache of the
# whole ports tree, including a table showing which makefiles and
# pkg-descr files are used by which ports, together with the last
# modification time of those files. This is the basis of the method
# for automatically determining which ports should be check for
# changes since the last cache update
#
# Running the make_describe method on a candidate port will chdir to
# that path and run 'make -V ...' in the given directory, from which
# it extracts the values of a number of make variables.  Internal
# processing of that output generates the equivalent of running 'make
# describe' (but we have saved executing a whole perl process for each
# port) and that is converted into a FreeBSD::Portindex::Port or a
# FreeBSD::Portindex::Category object, which is frozen (serialized)
# and saved in the ports cache.  It extracts the values of a couple of
# other make variables: .MAKEFILE_LIST and SUBDIRS. That output is
# recorded in the cached object as well (SUBDIRS only for Category
# objects).  Failing to run make successfully results in that port
# data being removed from the cache -- which is how deletions and port
# moves are handled.
#
# If an included Makefile is updated -- all ports including it are
# added to the list of update candidates.  Exception: if it should
# happens that /usr/ports/Mk/bsd.port.mk is updated, that means every
# port should be checked, in which case it would be better to
# reinitialise the cache from scratch.  This method also detects
# changes to OPTIONS settings by the timestamp on files under
# /var/db/ports/ -- In addition, /var/db/ports is scanned for new
# files, where options have recently been set.

use strict;
use warnings;

use FreeBSD::Portindex::Config
  qw(%Config read_config update_timestamp get_timestamp scrub_environment);
use FreeBSD::Portindex::ListVal;
use FreeBSD::Portindex::Makefile;
use FreeBSD::Portindex::Port;
use FreeBSD::Portindex::Tree;

$0 =~ s@.*/@@;    # Script name for error messages

# Just read a list of port origins, one per line, from the given file
# handle (default STDIN).  Now accepts the output of 'portsnap update'
sub read_plain (*$$)
{
    my $FH      = shift;
    my $updates = shift;
    my $tree    = shift;

    while ( my $name = <$FH> ) {
        $name =~ s/\s+\Z//;    # portsnap uses \r\n for EoL (???!)

        next
          unless $name =~ m@^/\S+\Z@;

        # portsnap shows an updated port by giving the origin like so
        # /usr/ports/category/port/ (with the trailing slash).  We
        # need to deal with an actual file, so add 'Makefile' to the
        # end.

        $name =~ s@/\Z@/Makefile@;

        $tree->add_to_updates_if_modified( $updates, $name );
    }
    return $updates;
}

# Process the output from svn(1) generated by running 'svn up'.  This
# is mostly a list of the files modified, but strip off up to three
# status indicator letters at the start of the line.  Always have to
# prepend $Config{PortsDir}
sub read_svn(*$$)
{
    my $FH      = shift;
    my $updates = shift;
    my $tree    = shift;
    my $name;

    while (<$FH>) {
        chomp;

        next
          unless ($name) = ( $_ =~ m@^[ABCDEGU]{1,3}\s{1,4}(\S+)\Z@ );

        $name = "$Config{PortsDir}/$name";

        $tree->add_to_updates_if_modified( $updates, $name );
    }
    return $updates;
}

MAIN:
{
    my $tree;
    my $allports;
    my $updates;
    my $category_updates;
    my $counter;
    my $startcounter;

    read_config('portindex');

    scrub_environment()
      if $Config{ScrubEnvironment};

    # Reopen STDIN if required
    if ( $Config{Input} ne '-' ) {
        open STDIN, '<', $Config{Input}
          or die "$0: Can't open input $Config{Input} -- $!\n";
    }

    # tie to the stored description, etc. data

    $tree = FreeBSD::Portindex::Tree->new(
        -Env           => { -Home => $Config{CacheDir}, },
        -CacheFilename => $Config{CacheFilename},
    );

    # Read the list of ports to re-check and update for the
    # appropriate input format.  GetOptions() will enforce correct
    # $Config{Format} syntax. 'plain' now works with the output from
    # 'portsnap update'

    $updates = FreeBSD::Portindex::ListVal->new();

    if ( $Config{Format} =~ m/plain/ ) {
        read_plain( *STDIN, $updates, $tree );
    } elsif ( $Config{Format} =~ m/svn-up/ ) {
        read_svn( *STDIN, $updates, $tree );
    }

    # Some Makefiles affect the compilation of all ports.  When those
    # are changed, then it is a good idea to re-initialise the cache
    # from scratch.  Note: the list of Makefiles included on every
    # invocation of make(1) is longer than this, but /usr/sys/Mk/* and
    # other stuff doesn't generally affect the contents of the
    # resulting INDEX.  Run 'make -V .MAKEFILE_LIST | tr ' ' \\n' in
    # some port directories to see the full story.

    if ( $Config{Format} =~ m/other/ ) {
        $tree->check_other_makefiles($updates);
    }

    # Read PORT_DBDIR and check timestamps on all options files --
    # alert for any options files not known to be associated with any
    # port.

    if ( $Config{Format} =~ m/options/ ) {
        $tree->check_port_options($updates);
    }

    # Just compare all of the timestamps in the cache (for Makefiles
    # and pkg-descr files) against the current state of the
    # filesystem.  This is a super-set of check_other_makefiles() and
    # check_port_options().

    if ( $Config{Format} =~ m/cache/ ) {
        $tree->check_cache_makefiles($updates);
    }

    # If there's nothing to update, exit immediately.

    unless ( $updates->length() ) {
        print STDERR "$0: Nothing to do!\n"
          if $Config{Verbose};
        $tree->flush();
        exit 0;
    }

    # Mark the time at which the updates start.  This can update
    # recorded timestamps for Makefiles even if no ports get checked.

    update_timestamp();

    # If any of the entries to update consists of a category Makefile,
    # then it should be replaced by a list of the differences to the
    # SUBDIRS from that category since the last update.  After this
    # process, $updates should contain only ports, without categories.

    # Select the categories marked as 'for upgrade'
    $category_updates = FreeBSD::Portindex::ListVal->new();

    for my $path ( $updates->get_sorted() ) {
        if ( $tree->category_match($path) ) {
            $category_updates->insert($path);
            $updates->delete($path);
        }
    }

    $counter = 0;
    do {
        $startcounter = $counter;

        for my $path ( $category_updates->get_sorted() ) {
            $counter++;
            print STDERR "$0:$counter: Checking for category changes at $path\n"
              if $Config{Verbose};
            $tree->category_check( $path, $category_updates, $updates );
        }
    } while ( $counter > $startcounter );

    # Regenerate the FreeBSD::Port objects for all listed ports

    $counter = 0;
    for my $path ( $updates->get_sorted() ) {
        $counter++;
        print STDERR "$0:$counter: Updating cached data for $path\n"
          if $Config{Verbose};
        $tree->make_describe("$Config{PortsDir}/$path");
    }

    $tree->flush();
    exit 0;
}

__END__

=head1 NAME

cache-update -- Incrementally update the portindex cache

=head1 SYNOPSIS

B<cache-update> [B<-hvqs>] [B<-c> F<dir>] [B<-C> F<file>] [B<-T> F<file>] [B<-p> F<dir>] [B<-d> F<dir>] [B<-f> I<format>] [B<-P> num] [B<-i> F<file>] [B<-M> F<file>]... [B<-m> F<file>]...

=head1 DESCRIPTION

B<cache-update> processes a list of port origins, regenerating the
index (C<make describe>) and F<Makefile> dependency data (C<make -V
.MAKEFILE_LIST>) or sub-directory list (C<make -V SUBDIR>) for
categories from each of them, and updating the record of thst data
held in the B<portindex> cache.  If B<cache-update> is passed a port
origin that no longer exists, it will delete any corresponding record
from the cache. To handle a port that has been moved, it is necessary
to pass B<cache-update> both the old and the new locations of the port
for processing.

The list of ports to be processed by B<cache-update> may be supplied
in three formats, together with two modifiers that add scanning of
certain other files:

=over 8

=item B<cache>

Uses the cached timestamps for all F<Makefiles> and F<pkg-descr> files
within the ports tree, and various other F<Makefiles> elsewhere in the
filesystem: any file that could affect the resultant INDEX.  While
this option is processed as if it were an input format, that is
misleading.  When using B<-f cache>, no input file is read.

B<cache> cannot be combined with any of the other input format
options.

B<cache> is the default input format.

=item B<plain>

A list of port origin directories, one per line.  The output of
B<portsnap update> can be passed directly to B<cache-update> or
B<find-updated> may be used to generate a list in this format.  The
generated list of ports to check for updates includes all listed in
the input, plus all ports that are slave ports of those listed in the
input, or ports that include any Makefile listed in the input.

=item B<svn-up>

The output from running B<svn update> to update a checked-out copy of
the ports tree.

=item B<options>

B<cache-update> will search the directory tree where port options
settings are stored. It will compare the timestamps on the F<options>
files it finds with the timestamps recorded in the cache of the last
time the data for that port was updated.  If the port options have
been updated more recently than the cache data, the cache entry is
refreshed.

=item B<other>

B<cache-update> will check the last modification time on a number of
makefiles outside C<PORTSDIR> and C<PORT_DBDIR>, and if they differ
from the values stored in the cache, will cause a check for updates of
any port that includes the makefile.  

=back

The B<options> format processing may also be combined with any of the
first three formats.  The following combinations are recognised in
addition to the single options above:

=over 8

=item B<plain,options>

=item B<plain,options,other>

=item B<plain,other>

=item B<svn-up,options>

=item B<svn-up,options,other>

=item B<svn-up,other>

=item B<options,other>

=back 

Where an update to a Makefile or pkg-descr is detected, all ports
where that file is included will be added to the list of ports to
reprocess.  In certain circumstances it may be more advantageous to
run B<cache-init> rather than B<cache-update>.

=head2 When to run B<cache-init> and when to run B<cache-update>

Over time, successively updating the F<INDEX> file via B<cache-update>
can produce minor inconsistencies and an F<INDEX> file that diverges
slowly from equivalence to what starting afresh would produce.  For
best results it will be necessary to occasionally re-run B<cache-init>
and rebuild the cache from scratch.  Certain changes to your system
should act as warnings that this needs to be done.

=over 8

=item *

Modifications to ubiquitously included makefiles such as
F</etc/make.conf> or F</usr/ports/Mk/bsd.port.mk> are handled
specially.  B<cache-update> will compare timestamps on these files
with the cache timestamp and attempt to warn you when they change.  If
so, cache re-initialisation might be a good idea.  It won't
automatically reinitialise, as that's an expensive operation and
frequently not necessary despite any changes to an unbiquitous
makefile.

The list of ubiquitous makefiles to test in this manner can be set for
B<cache-init> using the C<--ubiquitous-makefile> command line option
or C<UbiquitousMakefiles> configuration file.  These values are built
into the cached data.  If you want to change them, it will be
necessary to rerun B<cache-init>.

The C<--endemic-makefile> option or C<EndemicMakefiles> configuration
file setting is used by B<cache-init> in the same manner.  It lists
makefiles where changes should have no effect on the outcome of
generating the F<INDEX> at all.  Being marked as endemic will cause
changes to that makefile to be ignored, suppressing the warning on
changes if the makefile is also marked ubiquitous, or preventing
B<cache-update> rechecking and updating the cached data otherwise.

There are a number of additional makefiles located outside
F</usr/ports> (C<$PORTSDIR>) or F</var/db/ports> (C<$PORT_DBDIR>)
which can be included when B<make> is invoked by the ports
system. These are recorded by B<cache-init> and by default any such
makefiles will be checked for modification by B<cache-update>.  New
instances of such files may not be picked up immediately on update,
unless at least one of the ports that includes it is also modified.

=item *

Modifying the environment between successive runs of B<cache-update>.
B<make> variables can often be set from the environment, although
using F</etc/make.conf> would generally be a better idea.  There are
two things that can be done to prevent this causing problems.
Firstly, the configuration file can contain live I<Perl> code: you can
modify the environment of the processes by manipulating the global
C<%ENV> hash from within the configuration file.  Secondly both
B<cache-init> and B<cache-update> obey a C<--scrub-environment>
command line flag, and the equivalent C<ScrubEnvironment>
configuration file setting, which deletes everything from the
environment except for certain standard variables.  As command line
options generally override configuration files, C<--scrub-environment>
will trump modifying C<%ENV>.

=item *

Installing or updating certain software packages.  For instance, the
simple presence of the Gnome libraries on the system will cause many
packages to add a C<-gnome> suffix to their names.  The F<editors/vim>
port is an example of this behaviour.  Ports containing Linux software
run under emulation will automatically detect which version of the
F<linux-base> ports you have installed: changing to a different
F<linux-base> port will affect the dependency lists for all Linux
software ports.  Unfortunately it is practically impossible to detect
such changes and automatically update affected ports.  These are not
the only two examples of such behaviour.

=back

=head2 Configuration Files

B<cache-update> shares configuration files with B<cache-init>,
B<find-updated> and B<portindex>.  Any configuration settings are
taken from the following locations, where the later items on this list
override the earlier:

=over 8

=item *

Built-in settings from the B<FreeBSD::Portindex::Config> perl module.

=item *

The system wide configuration file F</usr/local/etc/portindex.cfg>

=item *

The per-user configuration file F<${HOME}/.portindexrc>. This file is
ignored if the process is run as root.

=item *

The local configuration file, found in the current working directory
of the B<cache-init> process F<./.portindexrc>.  This file is ignored
if the process is run as root.

=item *

The program command line.

=back

All of the configuration files are optional.  A summary of the
resultant configuration options including the effect of any command
line settings is printed as part of the help text when B<cache-init>
is invoked with the C<-h> option.

=head1 OPTIONS

=over 8

=item B<-h>

=item B<--help>

Print a brief usage message and a summary of the configuration
settings after command line processing and then exit.

=item B<-v>

=item B<--verbose>

Turn on verbose output printed to C<STDERR>. This is the default.

=item B<-q>

=item B<--quiet>

=item B<--noverbose>

Turn off verbose output to C<STDERR>.  Using both the B<-v> amd B<-q>
options together does not make any sense, but neither does it generate
an error.  The last mentioned of the two options will prevail.

=item B<-s>

=item B<--scrub-environment>

Delete all environment variables except for C<$USER>, C<$HOME>,
C<$PATH>, C<$SHELL>, C<$TERM> and C<$TERMCAP>.  This provides a
standardized environment for C<make describe> and other sub-processes.  

=item B<--noscrub-environment>

Turn off environment scrubbing.  All environment variables will be
passed through intact to C<make describe> and other
sub-processes. This is the default.

=item B<-c> F<dir>

=item B<--cache-dir>=F<dir>

The location of the B<portindex> data cache, by default
F</var/db/portindex>.

=item B<-C> F<file>

=item B<--cache-file>=F<file>

Berkeley DB Btree file containing the cached and processed values of a
number of C<make> variables for all of the ports in the tree.  This
file name will be relative to the cache directory (B<-c> option above)
unless an absolute path is given.  Defaults to F<portindex-cache.db>.

=item B<-T> F<file>

=item B<--timestamp-file>=F<file>

A file within the cache directory whose modification time marks the
last time that data was modified in or added to the cache.  Defaults
to F<portsindex-timestamp>

=item B<-p> F<dir>

=item B<--ports-dir>=F<dir>

The location of the ports tree. Almost always defaults to
F</usr/ports> unless C<$PORTSDIR> is set in the environment.

=item B<-d> F<dir>

=item B<--port-dbdir>=F<dir>

Where the C<OPTIONS> settings for ports are stored.  Almost always
defaults to F</var/db/ports> unless C<$PORT_DBDIR> is set in the
environment.

=item B<-i> F<file>

=item B<--input>=F<file>

Filename to read in order to generate the list of ports for which the
C<make describe> data needs to be reprocessed.  F<-> means read from
STDIN, which is the default.

=item B<-f> { I<cache> | I<plain> | I<svn-up> | I<options> |
 I<plain,options> | I<svn-up,options> | I<plain,other> |
 I<svn-up,other> | I<plain,options,other> | I<svn-up,options,other> }

=item B<--format>={ I<plain> | ... }

Which input formats B<cache-update> should parse to determine the list
of ports to reprocess.  Default: cache

=back

=head1 FILES

=over 16

=item F</usr/ports>

The default ports directory.

=item F</var/db/portindex>

The location of the data caches.

=item F<portindex-cache.db>

Btree file containing cached C<make describe> and other output.

=item F<portindex-timestamp>

This file contains the last time and date that the cache was updated
or modified.

=item F<__db.001>, F<__db.002>, F<__db.003>, F<__db.004>

Files used as part of the internal workings of BerkeleyDB, for memory
pool management and DB locking.  Will be recreated automatically if
deleted.

=item F</usr/local/etc/portindex.cfg>

System-wide configuration file.

=item F<${HOME}/.portindexrc>

Per-user configuration file

=item F<./.portindexrc>

Local configuration file

=back

=head1 SEE ALSO

L<cache-init(1)>, L<portindex(1)>, L<find-updated(1)>, L<cvsup(1)>,
L<ports(7)>

=head1 BUGS

I<cvsup-checkouts> format mode is not completely accurate.  Choosing the
correct propagation delay is a matter of guesswork.

B<cache-update> should optionally parse the contents of
B</usr/local/etc/pkgtools.conf> and apply settings from the
C<MAKE_ENV> array.

Changes to some makefiles outside the ports tree can have significant
effects, which aren't detected.  For instance
F</usr/local/etc/php.conf>.

=cut

#
# That's All Folks!
#
