From: Ian Jackson Date: Sun, 14 Aug 2016 16:55:44 +0000 (+0100) Subject: dgit: New fetch algorithm - try to tolerate in-archive copies X-Git-Tag: archive/debian/2.0~202 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=dgit.git;a=commitdiff_plain;h=c226efe2ca372f479fc70f88a90e9bc0a9fab7b3 dgit: New fetch algorithm - try to tolerate in-archive copies Big comment explains. Amongst other things: * Introduce the mergeinfo concept * Abolish fetchspec in favour of lrfetchref * Synthesize a pseudo-merge linking the archive's .dsc with the dgit server's git branch, as appropriate. * Introduce mergeinfo_version. I have tried to ensure that where the import done by previous versions of dgit would be correct, the new import is the same. Signed-off-by: Ian Jackson --- diff --git a/dgit b/dgit index 53ccd724..f038da35 100755 --- a/dgit +++ b/dgit @@ -155,6 +155,7 @@ sub lrref () { return "refs/remotes/$remotename/".server_branch($csuite); } sub rrref () { return server_ref($csuite); } sub lrfetchrefs () { return "refs/dgit-fetch/$csuite"; } +sub lrfetchref () { return lrfetchrefs.'/'.server_branch($csuite); } sub stripepoch ($) { my ($vsn) = @_; @@ -196,11 +197,6 @@ sub no_such_package () { exit 4; } -sub fetchspec () { - local $csuite = '*'; - return "+".rrref().":".lrref(); -} - sub changedir ($) { my ($newdir) = @_; printdebug "CD $newdir\n"; @@ -1220,9 +1216,11 @@ sub get_archive_dsc () { my $fmt = getfield $dsc, 'Format'; fail "unsupported source format $fmt, sorry" unless $format_ok{$fmt}; $dsc_checked = !!$digester; + printdebug "get_archive_dsc: Version ".(getfield $dsc, 'Version')."\n"; return; } $dsc = undef; + printdebug "get_archive_dsc: nothing in archive, returning undef\n"; } sub check_for_git (); @@ -1292,7 +1290,7 @@ sub create_remote_git_repo () { } } -our ($dsc_hash,$lastpush_hash); +our ($dsc_hash,$lastpush_mergeinput); our $ud = '.git/dgit/unpack'; @@ -1471,7 +1469,8 @@ sub check_for_vendor_patches () { "distro being accessed"); } -sub generate_commit_from_dsc () { +sub generate_commits_from_dsc () { + # See big comment in fetch_from_archive, below. prep_ud(); changedir $ud; @@ -1520,10 +1519,16 @@ $changes # imported from the archive END close C or die $!; - my $outputhash = make_commit qw(../commit.tmp); + my $rawimport_hash = make_commit qw(../commit.tmp); my $cversion = getfield $clogp, 'Version'; + my $rawimport_mergeinput = { + Commit => $rawimport_hash, + Info => "Import of source package", + }; + my @output = ($rawimport_mergeinput); progress "synthesised git commit from .dsc $cversion"; - if ($lastpush_hash) { + if ($lastpush_mergeinput) { + my $lastpush_hash = $lastpush_mergeinput->{Commit}; runcmd @git, qw(reset -q --hard), $lastpush_hash; runcmd qw(sh -ec), 'dpkg-parsechangelog >>../changelogold.tmp'; my $oldclogp = parsecontrol('../changelogold.tmp','previous changelog'); @@ -1531,18 +1536,10 @@ END my $vcmp = version_compare($oversion, $cversion); if ($vcmp < 0) { - # git upload/ is earlier vsn than archive, use archive - open C, ">../commit2.tmp" or die $!; - print C < < 1 }); Record $package ($cversion) in archive suite $csuite END - $outputhash = make_commit qw(../commit2.tmp); } elsif ($vcmp > 0) { print STDERR <{Clogp} exists and returns it + return $mi->{Clogp} if $mi->{Clogp}; + my $mclog = ".git/dgit/clog-$mi->{Commit}"; + mkpath '.git/dgit'; + runcmd shell_cmd "exec >$mclog", @git, qw(cat-file blob), + "$mi->{Commit}:debian/changelog"; + $mi->{Clogp} = parsechangelog("-l$mclog"); +} + +sub mergeinfo_version ($) { + return getfield( (mergeinfo_getclogp $_[0]), 'Version' ); +} + sub fetch_from_archive () { # ensures that lrref() is what is actually in the archive, # one way or another @@ -1660,17 +1667,108 @@ sub fetch_from_archive () { progress "no version available from the archive"; } - $lastpush_hash = git_get_ref(lrref()); + # If the archive's .dsc has a Dgit field, there are three + # relevant git commitids we need to choose between and/or merge + # together: + # 1. $dsc_hash: the Dgit field from the archive + # 2. $lastpush_hash: the suite branch on the dgit git server + # 3. $lastfetch_hash: our local tracking brach for the suite + # + # These may all be distinct and need not be in any fast forward + # relationship: + # + # If the dsc was pushed to this suite, then the server suite + # branch will have been updated; but it might have been pushed to + # a different suite and copied by the archive. Conversely a more + # recent version may have been pushed with dgit but not appeared + # in the archive (yet). + # + # $lastfetch_hash may be awkward because archive imports + # (particularly, imports of Dgit-less .dscs) are performed only as + # needed on individual clients, so different clients may perform a + # different subset of them - and these imports are only made + # public during push. So $lastfetch_hash may represent a set of + # imports different to a subsequent upload by a different dgit + # client. + # + # Our approach is as follows: + # + # As between $dsc_hash and $lastpush_hash: if $lastpush_hash is a + # descendant of $dsc_hash, then it was pushed by a dgit user who + # had based their work on $dsc_hash, so we should prefer it. + # Otherwise, $dsc_hash was installed into this suite in the + # archive other than by a dgit push, and (necessarily) after the + # last dgit push into that suite (since a dgit push would have + # been descended from the dgit server git branch); thus, in that + # case, we prefer the archive's version (and produce a + # pseudo-merge to overwrite the dgit server git branch). + # + # (If there is no Dgit field in the archive's .dsc then + # generate_commit_from_dsc uses the version numbers to decide + # whether the suite branch or the archive is newer. If the suite + # branch is newer it ignores the archive's .dsc; otherwise it + # generates an import of the .dsc, and produces a pseudo-merge to + # overwrite the suite branch with the archive contents.) + # + # The outcome of that part of the algorithm is the `public view', + # and is same for all dgit clients: it does not depend on any + # unpublished history in the local tracking branch. + # + # As between the public view and the local tracking branch: The + # local tracking branch is only updated by dgit fetch, and + # whenever dgit fetch runs it includes the public view in the + # local tracking branch. Therefore if the public view is not + # descended from the local tracking branch, the local tracking + # branch must contain history which was imported from the archive + # but never pushed; and, its tip is now out of date. So, we make + # a pseudo-merge to overwrite the old imports and stitch the old + # history in. + # + # Finally: we do not necessarily reify the public view (as + # described above). This is so that we do not end up stacking two + # pseudo-merges. So what we actually do is figure out the inputs + # to any public view psuedo-merge and put them in @mergeinputs. + + my @mergeinputs; + # $mergeinputs[]{Commit} + # $mergeinputs[]{Info} + # $mergeinputs[0] is the one whose tree we use + # @mergeinputs is in the order we use in the actual commit) + # + # Also: + # $mergeinputs[]{Message} is a commit message to use + # $mergeinputs[]{ReverseParents} if def specifies that parent + # list should be in opposite order + # Such an entry has no Commit or Info. It applies only when found + # in the last entry. (This ugliness is to support making + # identical imports to previous dgit versions.) + + my $lastpush_hash = git_get_ref(lrfetchref()); printdebug "previous reference hash=$lastpush_hash\n"; - my $hash; + $lastpush_mergeinput = $lastpush_hash && { + Commit => $lastpush_hash, + Info => "dgit suite branch on dgit git server", + }; + + my $lastfetch_hash = git_get_ref(lrref()); + printdebug "fetch_from_archive: lastfetch=$lastfetch_hash\n"; + my $lastfetch_mergeinput = $lastfetch_hash && { + Commit => $lastfetch_hash, + Info => "dgit client's archive history view", + }; + + my $dsc_mergeinput = $dsc_hash && { + Commit => $dsc_hash, + Info => "Dgit field in .dsc from archive", + }; + if (defined $dsc_hash) { fail "missing remote git history even though dsc has hash -". - " could not find ref ".lrref(). - " (should have been fetched from ".access_giturl()."#".rrref().")" + " could not find ref ".rref()." at ".access_giturl() unless $lastpush_hash; - $hash = $dsc_hash; ensure_we_have_orig(); if ($dsc_hash eq $lastpush_hash) { + @mergeinputs = $dsc_mergeinput } elsif (is_fast_fwd($dsc_hash,$lastpush_hash)) { print STDERR <{Commit}; + $h and is_fast_fwd($lastfetch_hash, $h); + # If true, one of the existing parents of this commit + # is a descendant of the $lastfetch_hash, so we'll + # be ff from that automatically. + } @mergeinputs + ) { + # Otherwise: + push @mergeinputs, $lastfetch_mergeinput; + } + + printdebug "fetch mergeinfos:\n"; + foreach my $mi (@mergeinputs) { + if ($mi->{Info}) { + printdebug " commit $mi->{Commit} $mi->{Info}\n"; + } else { + printdebug sprintf " ReverseParents=%d Message=%s", + $mi->{ReverseParents}, $mi->{Message}; + } + } + + my $compat_info= pop @mergeinputs + if $mergeinputs[$#mergeinputs]{Message}; + + @mergeinputs = grep { defined $_->{Commit} } @mergeinputs; + + my $hash; + if (@mergeinputs > 1) { + # here we go, then: + my $tree_commit = $mergeinputs[0]{Commit}; + + my $tree = cmdoutput @git, qw(cat-file commit), $tree_commit; + $tree =~ m/\n\n/; $tree = $`; + $tree =~ m/^tree (\w+)$/m or die "$dsc_hash tree ?"; + $tree = $1; + + # We use the changelog author of the package in question the + # author of this pseudo-merge. This is (roughly) correct if + # this commit is simply representing aa non-dgit upload. + # (Roughly because it does not record sponsorship - but we + # don't have sponsorship info because that's in the .changes, + # which isn't in the archivw.) + # + # But, it might be that we are representing archive history + # updates (including in-archive copies). These are not really + # the responsibility of the person who created the .dsc, but + # there is no-one whose name we should better use. (The + # author of the .dsc-named commit is clearly worse.) + + my $useclogp = mergeinfo_getclogp $mergeinputs[0]; + my $author = clogp_authline $useclogp; + my $cversion = getfield $useclogp, 'Version'; + + my $mcf = ".git/dgit/mergecommit"; + open MC, ">", $mcf or die "$mcf $!"; + print MC <{Commit} } @mergeinputs; + @parents = reverse @parents if $compat_info->{ReverseParents}; + print MC <{Commit} +END + + print MC <{Message}) { + print MC $compat_info->{Message} or die $!; + } else { + print MC <{Info} + or die $!; + }; + + $message_add_info->($mergeinputs[0]); + print MC <($_) foreach @mergeinputs[1..$#mergeinputs]; + } + + close MC or die $!; + $hash = make_commit $mcf; + } else { + $hash = $mergeinputs[0]{Commit}; } + progress "fetch hash=$hash\n"; + + my $chkff = sub { + my ($lasth, $what) = @_; + return unless $lasth; + die "$lasth $hash $what ?" unless is_fast_fwd($lasth, $hash); + }; + + $chkff->($lastpush_hash, 'dgit repo server tip (last push)'); + $chkff->($lastfetch_hash, 'local tracking tip (last fetch)'); + + runcmd @git, qw(update-ref -m), "dgit fetch $csuite", + 'DGIT_ARCHIVE', $hash; + cmdoutput @git, qw(log -n2), $hash; + # ... gives git a chance to complain if our commit is malformed + if (defined $skew_warning_vsn) { mkpath '.git/dgit'; printdebug "SKEW CHECK WANT $skew_warning_vsn\n"; @@ -1732,7 +1960,8 @@ We were able to obtain only $got_vsn END } } - if ($lastpush_hash ne $hash) { + + if ($lastfetch_hash ne $hash) { my @upd_cmd = (@git, qw(update-ref -m), 'dgit fetch', lrref(), $hash); if (act_local()) { cmdoutput @upd_cmd; @@ -1808,7 +2037,6 @@ sub clone ($) { runcmd @git, qw(init -q); my $giturl = access_giturl(1); if (defined $giturl) { - set_local_git_config "remote.$remotename.fetch", fetchspec(); open H, "> .git/HEAD" or die $!; print H "ref: ".lref()."\n" or die $!; close H or die $!;