#!/usr/bin/perl -w # git-debrebase # Script helping make fast-forwarding histories while still rebasing # upstream deltas when working on Debian packaging # # Copyright (C)2017 Ian Jackson # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # usages: # git-debrebase status # git-debrebase start # like ffqrebase start + debrebase launder # git-debrebase new-upstream [stuff] # see below # git-debrebase # does debrebase start if necessary # # git-debrebase analyse # git-debrebase launder # prints breakwater tip # git-debrebase create-new-upstream-breakwater [-f] ... # # is # [,][:][+][,...] # # if initial comma is supplied, entries are not positional. Unspecified # means root (and there may be only one). # xxx want auto branch names # xxx too complicated # how about for now # [+] [ [+]...] # ? plus options # --new-upstream-different-subtrees # # automatic case # git-debrebase new-upstream # - previous breakwater merge must be gdr-generated # - orig set is the same as before # - implicitly uses upstream branches according to orig set # - not all upstream branches need be updated # - insists on fast-forward of each branch, unless # --force (or --force=[/]) # branch set adjustments # git-debrebase new-upstream --add / # git-debrebase new-upstream --rm / # git-debrebase new-upstream / [/ ...] # - orig set is adjusted # - otherwise like auto (--add is not checked for ffness, obv) # - multiple --add and --rm may be specified # - --add makes new upstream the last contributor # explicit # git-debrebase / [] [/ [] ...] # - orig set is precisely as specified now # - previous breakwater merge is irrelevant # - no fast forward checks # for now only explicit with commitids # implicitly uses `upstream' # # (or multiple other branches) # git-debrebase new-upstream \ # [/]= # UPSTREAM[,[[SUBDIR:]SUBUPSTREAM] # default for SUBDIR: is from previous upstream merge[xxx terminology] # # #xxx # when starting must record original start (for ff) # and new rebase basis # # git-ffqrebase start [BASE] # # records previous HEAD so it can be overwritten # # records base for future git-ffqrebase # git-ffqrebase set-base BASE # git-ffqrebase # git-ffqrebase finish # git-ffqrebase status [BRANCH] # # refs/ffqrebase-prev/BRANCH BRANCH may be refs/...; if not it means # refs/ffqrebase-base/BRANCH refs/heads/BRANCH # zero, one, or both of these may exist # # git-debrebase without start, if already started, is willing # to strip pseudomerges provided that they overwrite exactly # the previous HEAD # xxxx is this right ? what matters is have we pushed # I think in fact the right answer is: # git-debrebase always strips out pseudomerges from its branch # a pseudomerge is put in at the time we want to push # at that time, we make a pseudomerge of the remote tracking # branch (if raw git) or the dgit view (if dgit) # for raw git git-ffqrebase, do want preciseley to record # value of remote tracking branch or our branch, on start, so we # overwrite only things we intend to # the previous pseudomerge check for tags and remote branches ? use strict; use Memoize; use Carp; use POSIX; use Data::Dumper; use Debian::Dgit qw(:DEFAULT $wa); sub badusage ($) { my ($m) = @_; die "bad usage: $m\n"; } sub cfg ($) { my ($k) = @_; $/ = "\0"; my @cmd = qw(git config -z); push @cmd, qw(--get-all) if wantarray; push @cmd, $k; my $out = cmdoutput @cmd; return split /\0/, $out; } memoize('cfg'); sub get_commit ($) { my ($objid) = @_; my ($type,$data) = git_cat_file $objid; die unless $type eq 'commit'; $data =~ m/(?<=\n)\n/; return ($`,$'); } sub D_DEB () { return 0x1; } # debian/ (not including debian/patches/) sub D_UPS () { return 0x2; } # upstream files sub D_PAT_ADD () { return 0x4; } # debian/patches/ extra patches at end sub D_PAT_OTH () { return 0x8; } # debian/patches other changes our $rd = ".git/git-debrebase"; our $ud = "$rd/work"; our @git = qw(git); sub get_differs ($$) { my ($x,$y) = @_; # This resembles quiltify_trees_differ, in dgit, a bit. But: we # don't care about modes, or dpkg-source-unrepresentable changes, # and we don't need the plethora of different modes. my $differs = 0; my $f; my $rundiff = sub { my ($opts, $sfx, $fn) = @_; $opts //= [qw(--name-only)]; my @cmd = (@git, qw(diff-tree -z --no-renames)); push @cmd, @$opts; push @cmd, "$_:$sfx" foreach $x, $y; my $diffs = cmdoutput @bcmd, $x, $y; foreach $f (split /\0/, $diffs) { $fn->(); } }; $rundiff(undef, '', sub { $differs |= $f eq 'debian' ? D_DEB : D_UPS; }); if ($differs & D_DEB) { $differs &= ~D_DEB; $rundiff(undef, ':debian', sub { $differs |= $f eq 'patches' ? D_PAT_OTH : D_DEB; }); } if ($differs & D_PAT_OTH) { $rundiff([qw(--name-status)], ':debian/patches', sub { }); } $f eq ' sub commit_pr_info ($) { my ($r) = @_; return Data::Dumper->dump([$r], [qw(commit)]); } sub calculate_committer_authline () { my $c = cmdoutput @git, qw(commit-tree --no-gpg-sign -m), 'DUMMY COMMIT (git-debrebase)', "HEAD:"; my ($h,$m) = get_commit $c; $h =~ m/^committer .*$/m or confess "($h) ?"; return $&; } # classify returns an info hash like this # CommitId => $objid # Hdr => # commit headers, including 1 final newline # Msg => # commit message (so one newline is dropped) # Tree => $treeobjid # Type => (see below) # Parents = [ { # Ix => $index # ie 0, 1, 2, ... # CommitId # Differs => return value from get_differs # IsOrigin # IsDggitImport => 'orig' 'tarball' 'unpatched' 'package' (as from dgit) # } ...] # NewMsg => # commit message, but with any [dgit import ...] edited # # to say "[was: ...]" # # Types: # Packaging # Upstream # AddPatches # Mixed # Unknown # # Pseudomerge # has additional entres in classification result # Overwritten = [ subset of Parents ] # Contributor = $the_remaining_Parent # # DgitImportUnpatched # has additional entry in classification result # OrigParents = [ subset of Parents ] # # BreakwaterUpstreamMerge # has additional entry in classification result # OrigParents = [ subset of Parents ] sub classify ($) { my ($objid) = @_; my ($h,$m) = get_commit $objid; my ($t) = $h =~ m/^tree (\w+)$/m or die $objid; my (@ph) = $h =~ m/^parent (\w+)$/m; my @p; my $r = { CommitId => $objid, Hdr => $h, Msg => $m, Tree => $t, Parents => \@p, }; foreach my $ph (@ph) { push @p, { Ix => $#p, CommitId => $ph, Differs => (get_differs $t, $ph), }; } my $classify = sub { my ($type, @rest) = @_; $r = { %$r, Type => $type, @rest }; return $r; }; my $unknown = sub { my ($why) = @_; $r = { %$r, Type => qw(Unknown) }; return $r; }; if (@p == 1) { my $d = $r->{Parents}[0]{Differs}; if ($d == D_PAT_ADD) { return $classify->(qw(AddPatches)); } elsif ($d & (D_PAT_ADD|D_PAT_OTH)) { return $unknown->("edits debian/patches"); } elsif ($d == D_DEB) { my ($ty,$dummy) = git_cat_file "$ph[0]:debian"; if ($ty eq 'tree') { return $classify->(qw(Packaging)); } elsif ($ty eq 'missing') { return $classify->(qw(BreakwaterStart)); } else { return $unknown->("parent's debian is not a directory"); } } elsif ($d == D_UPS) { return $classify->(qw(Upstream)); } elsif ($d == (D_DEB|D_UPS)) { return $classify->(qw(Mixed)); } elsif ($d == 0) { return $unknown->("no changes"); } else { confess "internal error $objid ?"; } } if (!@p) { return $unknown->("origin commit"); } my @identical = grep { !$_->{Differs} } @p; if (@p == 2 && @identical == 1) { my @overwritten = grep { $_->{Differs} } @p; confess "internal error $objid ?" unless @overwritten==1; return $classify->(qw(Pseudomerge), Overwritten => $overwritten[0], Contributor => $identical[0]); } if (@p == 2 && @identical == 2) { my @bytime = nsort_by { my ($ph,$pm) = get_commit $_->{CommitId}; $ph =~ m/^committer .* (\d+) [-+]\d+$/m or die "$_->{CommitId} ?"; $1; } @p; return $classify->(qw(Pseudomerge), SubType => qw(Ambiguous), Overwritten => $bytime[0], Contributor => $bytime[1]); } foreach my $p (@p) { my ($p_h, $p_m) = get_commit $p; $p->{IsOrigin} = $p_h !~ m/^parent \w+$/m; ($p->{IsDgitImport},) = $p_m =~ m/^\[dgit import ([0-9a-z]+) .*\]$/m; } my @orig_ps = grep { ($_->{IsDgitImport}//'X') eq 'orig' } @p; my $m2 = $m; if (!(grep { !$_->{IsOrigin} } @p) and (@orig_ps >= @p - 1) and $m2 =~ s{^\[(dgit import unpatched .*)\]$}{[was: $1]}m) { $r->{NewMsg} = $m2; return $classify->(qw(DgitImportUnpatched), OrigParents => \@orig_ps); } my ($stype, $series) = git_cat_file "$t:debian/patches/series"; my $haspatches = $stype ne 'missing' && $series =~ m/^\s*[^#\n\t ]/m; # How to decide about l/r ordering of breakwater merges ? git # --topo-order prefers to expand 2nd parent first. There's # already an easy rune to look for debian/ history anyway (git log # debian/) so debian breakwater branch should be 1st parent; that # way also there's also an easy rune to look for the upstream # patches (--topo-order). if (@p == 2 && !$haspatches && !$p[0]{IsOrigin} && # breakwater merge never starts with an origin !($p[0]{Differs} & ~D_DEB) && !($p[1]{Differs} & ~D_UPS)) { return $classify->(qw(BreakwaterUpstreamMerge), OrigParents => [ $p[1] ]); } # xxx multi-.orig upstreams return $unknown->("complex merge"); } sub walk ($;$$); sub walk ($;$$) { my ($input, $nogenerate,$report) = @_; # => ($tip, $breakwater_tip) # (or nothing, if $nogenerate) # go through commits backwards # we generate two lists of commits to apply: # breakwater branch and upstream patches my (@brw_cl, @upp_cl, @processed); my %found; my $upp_limit; my @pseudomerges; my $cl; my $xmsg = sub { my ($appendinfo) = @_; my $ms = $cl->{Msg}; chomp $ms; $ms .= "\n\n[git-debrebase $appendinfo]\n"; return (Msg => $ms); }; my $rewrite_from_here = sub { my $sp_cl = { SpecialMethod => 'StartRewrite' }; push @brw_cl, $sp_cl; push @processed, $sp_cl; }; my $cur = $input; my $prdelim = ""; my $prprdelim = sub { print $report $prdelim if $report; $prdelim=""; }; my $prline = sub { return unless $report; print $report $prdelim, @_; $prdelim = "\n"; }; my $bomb = sub { # usage: return $bomb->(); print $report " Unprocessable" if $report; $prprdelim->(); if ($nogenerate) { return (undef,undef); } die "commit $cur: Cannot cope with this commit"; }; my $build; my $breakwater; my $build_start = sub { my ($msg, $parent) = @_; $prline->(" $msg"); $build = $parent; no warnings qw(exiting); last; }; for (;;) { $cl = classify $cur; my $ty = $cl->{Type}; my $st = $cl->{SubType}; $prline->("$cl->{CommitId} $cl->{Type}"); $found{$ty. ( defined($st) ? "-$st" : '' )}++; push @processed, $cl; my $p0 = @{ $cl->{Parents} }==1 ? $cl->{Parents}[0]{CommitId} : undef; if ($ty eq 'AddPatches') { $cur = $p0; $rewrite_from_here->(); next; } elsif ($ty eq 'Packaging') { push @brw_cl, $cl; $cur = $p0; next; } elsif ($ty eq 'BreakwaterStart') { $build_start->('FirstPackaging', $cur); } elsif ($ty eq 'Upstream') { push @upp_cl, $cl; $cur = $p0; next; } elsif ($ty eq 'Mixed') { my $queue = sub { my ($q, $wh) = @_; my $cls = { $cl, $xmsg->("split mixed commit: $wh part") }; push @$q, $cls; }; $queue->(\@brw_cl, "debian"); $queue->(\@upp_cl, "upstream"); $rewrite_from_here->(); $cur = $p0; next; } elsif ($ty eq 'Pseudomerge') { print $report " Contributor=$ty->{Contributor}" if $report; push @pseudomerges, $cl; $rewrite_from_here->(); $cur = $ty->{Contributor}; next; } elsif ($ty eq 'BreakwaterUpstreamMerge') { $build_start->("PreviousBreakwater", $cur); } elsif ($ty eq 'DgitImportUnpatched') { my $pm = $pseudomerges[-1]; if (defined $pm) { # To an extent, this is heuristic. Imports don't have # a useful history of the debian/ branch. We assume # that the first pseudomerge after an import has a # useful history of debian/, and ignore the histories # from later pseudomerges. Often the first pseudomerge # will be the dgit import of the upload to the actual # suite intended by the non-dgit NMUer, and later # pseudomerges may represent in-archive copies. my $ovwrs = $pm->{Overwritten}; printf $report " PM=%s \@Overwr:%d", $pm, (scalar @$ovwrs) if $report; if (@$ovwrs != 1) { return $bomb->(); } my $ovwr = $ovwrs->[0]{CommitId}; printf $report " Overwr=%s", $ovwr if $report; # This import has a tree which is just like a # breakwater tree, but it has the wrong history. It # ought to have the previous breakwater (which the # pseudomerge overwrote) as an ancestor. That will # make the history of the debian/ files correct. As # for the upstream version: either it's the same as # was ovewritten (ie, same as the previous # breakwater), in which case that history is precisely # right; or, otherwise, it was a non-gitish upload of a # new upstream version. We can tell these apart by # looking at the tree of the supposed upstream. push @brw_cl, { %$cl, SpecialMethod => 'DgitImportDebianUpdate', $xmsg->("convert dgit import: debian changes") }; my $differs = (get_differs $ovwr, $cl->{Tree}); printf $report " Differs=%#x", $differs if $report; if ($differs & D_UPS) { printf $report " D_UPS" if $report; # This will also trigger if a non-dgit git-based NMU # deleted .gitignore (which is a thing that some of # the existing git tools do if the user doesn't # somehow tell them not to). Ah well. push @brw_cl, { %$cl, SpecialMethod => 'DgitImportUpstreamUpdate', $xmsg->("convert dgit import: upstream changes") }; } $prline->(" Import"); $rewrite_from_here->(); $upp_limit //= $#upp_cl; # further, deeper, patches discarded $cur = $ovwr; next; } else { # Everything is from this import. This kind of import # is already in valid breakwater format, with the # patches as commits. printf $report " NoPM" if $report; # last thing we processed will have been the first patch, # if there is one; which is fine, so no need to rewrite # on account of this import $build_start->("ImportOrigin", $cur); } die "$ty ?"; } else { return $bomb->(); } } $prprdelim->(); return if $nogenerate; # Now we build it back up again workarea_fresh(); my $rewriting = 0; my $rm_tree_cached = sub { my ($subdir) = @_; runcmd @git, qw(rm --quiet -rf --cached), $subdir; }; my $read_tree_debian = sub { my ($treeish) = @_; $rm_tree_cached->(qw(debian)); runcmd @git, qw(read-tree --prefix=debian/), "$treeish:debian"; }; my $read_tree_upstream = sub { my ($treeish) = @_; runcmd @git, qw(read-tree), $treeish; $read_tree_debian->($build); }; my $committer_authline = calculate_committer_authline(); in_workarea sub { mkdir $rd or $!==EEXIST or die $!; my $current_method; foreach my $cl (qw(Debian), (reverse @brw_cl), { SpecialMethod => 'RecordBreakwaterTip' }, qw(Upstream), (reverse @upp_cl)) { if (!ref $cl) { $current_method = $cl; next; } my $method = $cl->{SpecialMethod} // $current_method; my @parents = ($build); my $cltree = $cl->{CommitId}; if ($method eq 'Debian') { $read_tree_debian->($cltree); } elsif ($method eq 'Upstream') { $read_tree_upstream->($cltree); } elsif ($method eq 'StartRewrite') { $rewriting = 1; next; } elsif ($method eq 'RecordBreakwaterTip') { $breakwater = $build; next; } elsif ($method eq 'DgitImportDebianUpdate') { $read_tree_debian->($cltree); $rm_tree_cached->(qw(debian/patches)); } elsif ($method eq 'DgitImportUpstreamUpdate') { $read_tree_upstream->($cltree); push @parents, map { $_->{CommitId} } @{ $cl->{OrigParents} }; } else { confess "$method ?"; } $rewriting ||= $cl ne pop @processed; my $newtree = cmdoutput @git, qw(write-tree); my $ch = $cl->{Hdr}; $ch =~ s{^tree .*}{tree $newtree}m or confess "$ch ?"; $ch =~ s{^parent .*\n}{}m; $ch =~ s{(?=^author)}{ map { "parent $_\n" } @parents }me or confess "$ch ?"; if ($rewriting) { $ch =~ s{^committer .*$}{$committer_authline}m or confess "$ch ?"; } my $cf = "$rd/m$rewriting"; open CD, ">", $cf or die $!; print CD $ch, "\n", $cl->{Msg} or die $!; close CD or die $!; my @cmd = (@git, qw(hash-object)); push @cmd, qw(-w) if $rewriting; push @cmd, qw(-t commit), $cf; my $newcommit = cmdoutput @cmd; confess "$ch ?" unless $rewriting or $newcommit eq $cl->{CommitId}; $build = $newcommit; } }; runcmd @git, qw(diff-tree --quiet), $input, $build; return ($build, $breakwater); } sub get_head () { return git_rev_parse qw(HEAD); } sub update_head ($$$) { my ($old, $new, $mrest) = @_; runcmd @git, qw(update-ref -m), "git-debrebase $mrest", $new, $old; } sub cmd_launder () { badusage "no arguments to launder allowed"; my $old = get_head(); my ($tip,$breakwater) = walk $old; update_head $old, $tip, 'launder'; # no tree changes except debian/patches runcmd @git, qw(rm --quiet -rf debian/patches); printf "# breakwater tip\n%s\n", $breakwater; } sub cmd_analyse () { die if ($ARGV[0]//'') =~ m/^-/; badusage "too many arguments to analyse" if @ARGV>1; my ($old) = @ARGV; if (defined $old) { $old = git_rev_parse $old; } else { $old = get_head(); } my ($dummy,$breakwater) = walk $old, 1,*STDOUT; print "$breakwater BREAKWATER\n"; STDOUT->error and die $!; } my $toplevel = cmdoutput @git, qw(rev-parse --show-toplevel); chdir $toplevel or die "chdir $toplevel: $!"; my $cmd = shift @ARGV; my $cmdfn = $cmd; $cmdfn =~ y/-/_/; $cmdfn = ${*::}{"cmd_$cmdfn"}; $cmdfn or badusage "unknown git-debrebase sub-operation $cmd"; $cmdfn->();