From: Ian Jackson Date: Sun, 2 Oct 2016 19:55:57 +0000 (+0100) Subject: New import: Entirely new import algorithm X-Git-Tag: archive/debian/2.0~93 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=dgit.git;a=commitdiff_plain;h=0eac80b3d8464d1b476cb27bb5d4f286d56dd422 New import: Entirely new import algorithm See README.dsc-import (which needs some updating). Signed-off-by: Ian Jackson --- diff --git a/dgit b/dgit index 15a94c72..b4b6d9cd 100755 --- a/dgit +++ b/dgit @@ -1572,35 +1572,258 @@ sub generate_commits_from_dsc () { } } + # We unpack and record the orig tarballs first, so that we only + # need disk space for one private copy of the unpacked source. + # But we can't make them into commits until we have the metadata + # from the debian/changelog, so we record the tree objects now and + # make them into commits later. + my @tartrees; + my $upstreamv = $dsc->{version}; + $upstreamv =~ s/-[^-]+$//; + my $orig_f_base = srcfn $upstreamv, ''; + + foreach my $fi (@dfi) { + # We actually import, and record as a commit, every tarball + # (unless there is only one file, in which case there seems + # little point. + + my $f = $fi->{Filename}; + printdebug "import considering $f "; + (printdebug "only one dfi\n"), next if @dfi == 1; + (printdebug "not tar\n"), next unless $f =~ m/\.tar(\.\w+)?$/; + my $compr_ext = $1; + + my ($orig_f_part) = + $f =~ m/^\Q$orig_f_base\E\.([^._]+)?\.tar(?:\.\w+)?$/; + + printdebug "Y ", (join ' ', map { $_//"(none)" } + $compr_ext, $orig_f_part + ), "\n"; + + my $input = new IO::File $f, '<' or die "$f $!"; + my $compr_pid; + my @compr_cmd; + + if (defined $compr_ext) { + my $cname = + Dpkg::Compression::compression_guess_from_filename $f; + fail "Dpkg::Compression cannot handle file $f in source package" + if defined $compr_ext && !defined $cname; + my $compr_proc = + new Dpkg::Compression::Process compression => $cname; + my @compr_cmd = $compr_proc->get_uncompress_cmdline(); + my $compr_fh = new IO::Handle; + my $compr_pid = open $compr_fh, "-|" // die $!; + if (!$compr_pid) { + open STDIN, "<&", $input or die $!; + exec @compr_cmd; + die "dgit (child): exec $compr_cmd[0]: $!\n"; + } + $input = $compr_fh; + } + + rmtree "../unpack-tar"; + mkdir "../unpack-tar" or die $!; + my @tarcmd = qw(tar -x -f - + --no-same-owner --no-same-permissions + --no-acls --no-xattrs --no-selinux); + my $tar_pid = fork // die $!; + if (!$tar_pid) { + chdir "../unpack-tar" or die $!; + open STDIN, "<&", $input or die $!; + exec @tarcmd; + die "dgit (child): exec $tarcmd[0]: $!"; + } + $!=0; (waitpid $tar_pid, 0) == $tar_pid or die $!; + !$? or failedcmd @tarcmd; + + close $input or + (@compr_cmd ? failedcmd @compr_cmd + : die $!); + # finally, we have the results in "tarball", but maybe + # with the wrong permissions + + runcmd qw(chmod -R +rwX ../unpack-tar); + changedir "../unpack-tar"; + my ($tree) = mktree_in_ud_from_only_subdir(1); + changedir "../../unpack"; + rmtree "../unpack-tar"; + + my $ent = [ $f, $tree ]; + push @tartrees, { + Orig => !!$orig_f_part, + Sort => (!$orig_f_part ? 2 : + $orig_f_part =~ m/-/g ? 1 : + 0), + F => $f, + Tree => $tree, + }; + } + + @tartrees = sort { + # put any without "_" first (spec is not clear whether files + # are always in the usual order). Tarballs without "_" are + # the main orig or the debian tarball. + $a->{Sort} <=> $b->{Sort} or + $a->{F} cmp $b->{F} + } @tartrees; + + my $any_orig = grep { $_->{Orig} } @tartrees; + my $dscfn = "$package.dsc"; + my $treeimporthow = 'package'; + open D, ">", $dscfn or die "$dscfn: $!"; print D $dscdata or die "$dscfn: $!"; close D or die "$dscfn: $!"; my @cmd = qw(dpkg-source); push @cmd, '--no-check' if $dsc_checked; + if (madformat $dsc->{format}) { + push @cmd, '--skip-patches'; + $treeimporthow = 'unpatched'; + } push @cmd, qw(-x --), $dscfn; runcmd @cmd; my ($tree,$dir) = mktree_in_ud_from_only_subdir(); - check_for_vendor_patches() if madformat($dsc->{format}); - runcmd qw(sh -ec), 'dpkg-parsechangelog >../changelog.tmp'; - my $clogp = parsecontrol('../changelog.tmp',"commit's changelog"); + if (madformat $dsc->{format}) { + check_for_vendor_patches(); + } + + my $dappliedtree; + if (madformat $dsc->{format}) { + my @pcmd = qw(dpkg-source --before-build .); + runcmd shell_cmd 'exec >/dev/null', @pcmd; + rmtree '.pc'; + runcmd @git, qw(add -Af); + $dappliedtree = git_write_tree(); + } + + my @clogcmd = qw(dpkg-parsechangelog --format rfc822 --all); + debugcmd "|",@clogcmd; + open CLOGS, "-|", @clogcmd or die $!; + + my $clogp; + my $r1clogp; + + for (;;) { + my $stanzatext = do { local $/=""; ; }; + last if !defined $stanzatext; + + my $desc = "package changelog, entry no.$."; + open my $stanzafh, "<", \$stanzatext or die; + my $thisstanza = parsecontrolfh $stanzafh, $desc, 1; + $clogp //= $thisstanza; + + last if !$any_orig; # we don't need $r1clogp + + # We look for the first (most recent) changelog entry whose + # version number is lower than the upstream version of this + # package. Then the last (least recent) previous changelog + # entry is treated as the one which introduced this upstream + # version and used for the synthetic commits for the upstream + # tarballs. + + # One might think that a more sophisticated algorithm would be + # necessary. But: we do not want to scan the whole changelog + # file. Stopping when we see an earlier version, which + # necessarily then is an earlier upstream version, is the only + # realistic way to do that. Then, either the earliest + # changelog entry we have seen so far is indeed the earliest + # upload of this upstream version; or there are only changelog + # entries relating to later upstream versions (which is not + # possible unless the changelog and .dsc disagree about the + # version). Then it remains to choose between the physically + # last entry in the file, and the one with the lowest version + # number. If these are not the same, we guess that the + # versions were created in a non-monotic order rather than + # that the changelog entries have been misordered. + + last if version_compare($thisstanza->{version}, $upstreamv) < 0; + $r1clogp = $thisstanza; + } + die $! if CLOGS->error; + close CLOGS or $?==(SIGPIPE<<8) or failedcmd @clogcmd; + + $clogp or fail "package changelog has no entries!"; + my $authline = clogp_authline $clogp; my $changes = getfield $clogp, 'Changes'; + my $cversion = getfield $clogp, 'Version'; + + if (@tartrees) { + $r1clogp //= $clogp; # maybe there's only one entry; + my $r1authline = clogp_authline $r1clogp; + # Strictly, r1authline might now be wrong if it's going to be + # unused because !$any_orig. Whatever. + + foreach my $tt (@tartrees) { + $tt->{Commit} = make_commit_text($tt->{Orig} ? <{Tree} +author $r1authline +committer $r1authline + +Import $tt->{F} + +[dgit import orig $tt->{F}] +END_O +tree $tt->{Tree} +author $authline +committer $authline + +Import $tt->{F} + +[dgit import tarball $package $cversion $tt->{F}] +END_T + } + } + open C, ">../commit.tmp" or die $!; print C <{Commit} +END + print C <{format}) { + # regularise the state of the working tree so that + # the checkout of $rawimport_hash works nicely. + my $dappliedcommit = make_commit_text(</dev/null', @gbp, qw(pq import); + my $gapplied = git_rev_parse('HEAD'); + my $gappliedtree = cmdoutput @git, qw(rev-parse HEAD:); + $gappliedtree eq $dappliedtree or + fail <