1 """A Python class hierarchy wrapping a git repository and its
5 from datetime import datetime, timedelta, tzinfo
7 from stgit import exception, run, utils
8 from stgit.config import config
10 class Immutable(object):
11 """I{Immutable} objects cannot be modified once created. Any
12 modification methods will return a new object, leaving the
13 original object as it was.
15 The reason for this is that we want to be able to represent git
16 objects, which are immutable, and want to be able to create new
17 git objects that are just slight modifications of other git
18 objects. (Such as, for example, modifying the commit message of a
19 commit object while leaving the rest of it intact. This involves
20 creating a whole new commit object that's exactly like the old one
21 except for the commit message.)
23 The L{Immutable} class doesn't actually enforce immutability --
24 that is up to the individual immutable subclasses. It just serves
27 class RepositoryException(exception.StgException):
28 """Base class for all exceptions due to failed L{Repository}
31 class BranchException(exception.StgException):
32 """Exception raised by failed L{Branch} operations."""
34 class DateException(exception.StgException):
35 """Exception raised when a date+time string could not be parsed."""
36 def __init__(self, string, type):
37 exception.StgException.__init__(
38 self, '"%s" is not a valid %s' % (string, type))
40 class DetachedHeadException(RepositoryException):
41 """Exception raised when HEAD is detached (that is, there is no
44 RepositoryException.__init__(self, 'Not on any branch')
47 """Utility class that defines C{__reps__} in terms of C{__str__}."""
51 class NoValue(object):
52 """A handy default value that is guaranteed to be distinct from any
53 real argument value."""
56 def make_defaults(defaults):
57 def d(val, attr, default_fun = lambda: None):
60 elif defaults != NoValue:
61 return getattr(defaults, attr)
66 class TimeZone(tzinfo, Repr):
67 """A simple time zone class for static offsets from UTC. (We have to
68 define our own since Python's standard library doesn't define any
69 time zone classes.)"""
70 def __init__(self, tzstring):
71 m = re.match(r'^([+-])(\d{2}):?(\d{2})$', tzstring)
73 raise DateException(tzstring, 'time zone')
74 sign = int(m.group(1) + '1')
76 self.__offset = timedelta(hours = sign*int(m.group(2)),
77 minutes = sign*int(m.group(3)))
79 raise DateException(tzstring, 'time zone')
80 self.__name = tzstring
81 def utcoffset(self, dt):
90 class Date(Immutable, Repr):
91 """Represents a timestamp used in git commits."""
92 def __init__(self, datestring):
93 # Try git-formatted date.
94 m = re.match(r'^(\d+)\s+([+-]\d\d:?\d\d)$', datestring)
97 self.__time = datetime.fromtimestamp(int(m.group(1)),
100 raise DateException(datestring, 'date')
103 # Try iso-formatted date.
104 m = re.match(r'^(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})\s+'
105 + r'([+-]\d\d:?\d\d)$', datestring)
108 self.__time = datetime(
109 *[int(m.group(i + 1)) for i in xrange(6)],
110 **{'tzinfo': TimeZone(m.group(7))})
112 raise DateException(datestring, 'date')
115 raise DateException(datestring, 'date')
117 return self.isoformat()
119 """Human-friendly ISO 8601 format."""
120 return '%s %s' % (self.__time.replace(tzinfo = None).isoformat(' '),
123 def maybe(cls, datestring):
124 """Return a new object initialized with the argument if it contains a
125 value (otherwise, just return the argument)."""
126 if datestring in [None, NoValue]:
128 return cls(datestring)
130 class Person(Immutable, Repr):
131 """Represents an author or committer in a git commit object. Contains
132 name, email and timestamp."""
133 def __init__(self, name = NoValue, email = NoValue,
134 date = NoValue, defaults = NoValue):
135 d = make_defaults(defaults)
136 self.__name = d(name, 'name')
137 self.__email = d(email, 'email')
138 self.__date = d(date, 'date')
139 assert isinstance(self.__date, Date) or self.__date in [None, NoValue]
140 name = property(lambda self: self.__name)
141 email = property(lambda self: self.__email)
142 date = property(lambda self: self.__date)
143 def set_name(self, name):
144 return type(self)(name = name, defaults = self)
145 def set_email(self, email):
146 return type(self)(email = email, defaults = self)
147 def set_date(self, date):
148 return type(self)(date = date, defaults = self)
150 return '%s <%s> %s' % (self.name, self.email, self.date)
153 m = re.match(r'^([^<]*)<([^>]*)>\s+(\d+\s+[+-]\d{4})$', s)
155 name = m.group(1).strip()
157 date = Date(m.group(3))
158 return cls(name, email, date)
161 if not hasattr(cls, '__user'):
162 cls.__user = cls(name = config.get('user.name'),
163 email = config.get('user.email'))
167 if not hasattr(cls, '__author'):
169 name = os.environ.get('GIT_AUTHOR_NAME', NoValue),
170 email = os.environ.get('GIT_AUTHOR_EMAIL', NoValue),
171 date = Date.maybe(os.environ.get('GIT_AUTHOR_DATE', NoValue)),
172 defaults = cls.user())
176 if not hasattr(cls, '__committer'):
177 cls.__committer = cls(
178 name = os.environ.get('GIT_COMMITTER_NAME', NoValue),
179 email = os.environ.get('GIT_COMMITTER_EMAIL', NoValue),
181 os.environ.get('GIT_COMMITTER_DATE', NoValue)),
182 defaults = cls.user())
183 return cls.__committer
185 class Tree(Immutable, Repr):
186 """Represents a git tree object."""
187 def __init__(self, sha1):
189 sha1 = property(lambda self: self.__sha1)
191 return 'Tree<%s>' % self.sha1
193 class CommitData(Immutable, Repr):
194 """Represents the actual data contents of a git commit object."""
195 def __init__(self, tree = NoValue, parents = NoValue, author = NoValue,
196 committer = NoValue, message = NoValue, defaults = NoValue):
197 d = make_defaults(defaults)
198 self.__tree = d(tree, 'tree')
199 self.__parents = d(parents, 'parents')
200 self.__author = d(author, 'author', Person.author)
201 self.__committer = d(committer, 'committer', Person.committer)
202 self.__message = d(message, 'message')
203 tree = property(lambda self: self.__tree)
204 parents = property(lambda self: self.__parents)
207 assert len(self.__parents) == 1
208 return self.__parents[0]
209 author = property(lambda self: self.__author)
210 committer = property(lambda self: self.__committer)
211 message = property(lambda self: self.__message)
212 def set_tree(self, tree):
213 return type(self)(tree = tree, defaults = self)
214 def set_parents(self, parents):
215 return type(self)(parents = parents, defaults = self)
216 def add_parent(self, parent):
217 return type(self)(parents = list(self.parents or []) + [parent],
219 def set_parent(self, parent):
220 return self.set_parents([parent])
221 def set_author(self, author):
222 return type(self)(author = author, defaults = self)
223 def set_committer(self, committer):
224 return type(self)(committer = committer, defaults = self)
225 def set_message(self, message):
226 return type(self)(message = message, defaults = self)
227 def is_nochange(self):
228 return len(self.parents) == 1 and self.tree == self.parent.data.tree
230 if self.tree == None:
233 tree = self.tree.sha1
234 if self.parents == None:
237 parents = [p.sha1 for p in self.parents]
238 return ('CommitData<tree: %s, parents: %s, author: %s,'
239 ' committer: %s, message: "%s">'
240 ) % (tree, parents, self.author, self.committer, self.message)
242 def parse(cls, repository, s):
243 cd = cls(parents = [])
244 lines = list(s.splitlines(True))
245 for i in xrange(len(lines)):
246 line = lines[i].strip()
248 return cd.set_message(''.join(lines[i+1:]))
249 key, value = line.split(None, 1)
251 cd = cd.set_tree(repository.get_tree(value))
252 elif key == 'parent':
253 cd = cd.add_parent(repository.get_commit(value))
254 elif key == 'author':
255 cd = cd.set_author(Person.parse(value))
256 elif key == 'committer':
257 cd = cd.set_committer(Person.parse(value))
262 class Commit(Immutable, Repr):
263 """Represents a git commit object. All the actual data contents of the
264 commit object is stored in the L{data} member, which is a
265 L{CommitData} object."""
266 def __init__(self, repository, sha1):
268 self.__repository = repository
270 sha1 = property(lambda self: self.__sha1)
273 if self.__data == None:
274 self.__data = CommitData.parse(
276 self.__repository.cat_object(self.sha1))
279 return 'Commit<sha1: %s, data: %s>' % (self.sha1, self.__data)
282 """Accessor for the refs stored in a git repository. Will
283 transparently cache the values of all refs."""
284 def __init__(self, repository):
285 self.__repository = repository
287 def __cache_refs(self):
288 """(Re-)Build the cache of all refs in the repository."""
290 for line in self.__repository.run(['git', 'show-ref']).output_lines():
291 m = re.match(r'^([0-9a-f]{40})\s+(\S+)$', line)
292 sha1, ref = m.groups()
293 self.__refs[ref] = sha1
295 """Get the Commit the given ref points to. Throws KeyError if ref
297 if self.__refs == None:
299 return self.__repository.get_commit(self.__refs[ref])
300 def exists(self, ref):
301 """Check if the given ref exists."""
308 def set(self, ref, commit, msg):
309 """Write the sha1 of the given Commit to the ref. The ref may or may
310 not already exist."""
311 if self.__refs == None:
313 old_sha1 = self.__refs.get(ref, '0'*40)
314 new_sha1 = commit.sha1
315 if old_sha1 != new_sha1:
316 self.__repository.run(['git', 'update-ref', '-m', msg,
317 ref, new_sha1, old_sha1]).no_output()
318 self.__refs[ref] = new_sha1
319 def delete(self, ref):
320 """Delete the given ref. Throws KeyError if ref doesn't exist."""
321 if self.__refs == None:
323 self.__repository.run(['git', 'update-ref',
324 '-d', ref, self.__refs[ref]]).no_output()
327 class ObjectCache(object):
328 """Cache for Python objects, for making sure that we create only one
329 Python object per git object. This reduces memory consumption and
330 makes object comparison very cheap."""
331 def __init__(self, create):
333 self.__create = create
334 def __getitem__(self, name):
335 if not name in self.__objects:
336 self.__objects[name] = self.__create(name)
337 return self.__objects[name]
338 def __contains__(self, name):
339 return name in self.__objects
340 def __setitem__(self, name, val):
341 assert not name in self.__objects
342 self.__objects[name] = val
344 class RunWithEnv(object):
345 def run(self, args, env = {}):
346 """Run the given command with an environment given by self.env.
348 @type args: list of strings
349 @param args: Command and argument vector
351 @param env: Extra environment"""
352 return run.Run(*args).env(utils.add_dict(self.env, env))
354 class RunWithEnvCwd(RunWithEnv):
355 def run(self, args, env = {}):
356 """Run the given command with an environment given by self.env, and
357 current working directory given by self.cwd.
359 @type args: list of strings
360 @param args: Command and argument vector
362 @param env: Extra environment"""
363 return RunWithEnv.run(self, args, env).cwd(self.cwd)
365 class Repository(RunWithEnv):
366 """Represents a git repository."""
367 def __init__(self, directory):
368 self.__git_dir = directory
369 self.__refs = Refs(self)
370 self.__trees = ObjectCache(lambda sha1: Tree(sha1))
371 self.__commits = ObjectCache(lambda sha1: Commit(self, sha1))
372 self.__default_index = None
373 self.__default_worktree = None
374 self.__default_iw = None
375 env = property(lambda self: { 'GIT_DIR': self.__git_dir })
378 """Return the default repository."""
380 return cls(run.Run('git', 'rev-parse', '--git-dir'
382 except run.RunException:
383 raise RepositoryException('Cannot find git repository')
385 def current_branch_name(self):
386 """Return the name of the current branch."""
387 return utils.strip_leading('refs/heads/', self.head_ref)
389 def default_index(self):
390 """An L{Index} object representing the default index file for the
392 if self.__default_index == None:
393 self.__default_index = Index(
394 self, (os.environ.get('GIT_INDEX_FILE', None)
395 or os.path.join(self.__git_dir, 'index')))
396 return self.__default_index
397 def temp_index(self):
398 """Return an L{Index} object representing a new temporary index file
399 for the repository."""
400 return Index(self, self.__git_dir)
402 def default_worktree(self):
403 """A L{Worktree} object representing the default work tree."""
404 if self.__default_worktree == None:
405 path = os.environ.get('GIT_WORK_TREE', None)
407 o = run.Run('git', 'rev-parse', '--show-cdup').output_lines()
411 self.__default_worktree = Worktree(path)
412 return self.__default_worktree
414 def default_iw(self):
415 """An L{IndexAndWorktree} object representing the default index and
416 work tree for this repository."""
417 if self.__default_iw == None:
418 self.__default_iw = IndexAndWorktree(self.default_index,
419 self.default_worktree)
420 return self.__default_iw
421 directory = property(lambda self: self.__git_dir)
422 refs = property(lambda self: self.__refs)
423 def cat_object(self, sha1):
424 return self.run(['git', 'cat-file', '-p', sha1]).raw_output()
425 def rev_parse(self, rev):
427 return self.get_commit(self.run(
428 ['git', 'rev-parse', '%s^{commit}' % rev]
430 except run.RunException:
431 raise RepositoryException('%s: No such revision' % rev)
432 def get_tree(self, sha1):
433 return self.__trees[sha1]
434 def get_commit(self, sha1):
435 return self.__commits[sha1]
436 def commit(self, commitdata):
437 c = ['git', 'commit-tree', commitdata.tree.sha1]
438 for p in commitdata.parents:
442 for p, v1 in ((commitdata.author, 'AUTHOR'),
443 (commitdata.committer, 'COMMITTER')):
445 for attr, v2 in (('name', 'NAME'), ('email', 'EMAIL'),
447 if getattr(p, attr) != None:
448 env['GIT_%s_%s' % (v1, v2)] = str(getattr(p, attr))
449 sha1 = self.run(c, env = env).raw_input(commitdata.message
451 return self.get_commit(sha1)
455 return self.run(['git', 'symbolic-ref', '-q', 'HEAD']
457 except run.RunException:
458 raise DetachedHeadException()
459 def set_head_ref(self, ref, msg):
460 self.run(['git', 'symbolic-ref', '-m', msg, 'HEAD', ref]).no_output()
461 def simple_merge(self, base, ours, theirs):
462 """Given three L{Tree}s, tries to do an in-index merge with a
463 temporary index. Returns the result L{Tree}, or None if the
464 merge failed (due to conflicts)."""
465 assert isinstance(base, Tree)
466 assert isinstance(ours, Tree)
467 assert isinstance(theirs, Tree)
469 # Take care of the really trivial cases.
477 index = self.temp_index()
478 index.read_tree(ours)
481 index.apply_treediff(base, theirs, quiet = True)
482 return index.write_tree()
483 except MergeException:
487 def apply(self, tree, patch_text, quiet):
488 """Given a L{Tree} and a patch, will either return the new L{Tree}
489 that results when the patch is applied, or None if the patch
490 couldn't be applied."""
491 assert isinstance(tree, Tree)
494 index = self.temp_index()
496 index.read_tree(tree)
498 index.apply(patch_text, quiet)
499 return index.write_tree()
500 except MergeException:
504 def diff_tree(self, t1, t2, diff_opts):
505 """Given two L{Tree}s C{t1} and C{t2}, return the patch that takes
508 @type diff_opts: list of strings
509 @param diff_opts: Extra diff options
511 @return: Patch text"""
512 assert isinstance(t1, Tree)
513 assert isinstance(t2, Tree)
514 return self.run(['git', 'diff-tree', '-p'] + list(diff_opts)
515 + [t1.sha1, t2.sha1]).raw_output()
517 class MergeException(exception.StgException):
518 """Exception raised when a merge fails for some reason."""
520 class MergeConflictException(MergeException):
521 """Exception raised when a merge fails due to conflicts."""
523 class Index(RunWithEnv):
524 """Represents a git index file."""
525 def __init__(self, repository, filename):
526 self.__repository = repository
527 if os.path.isdir(filename):
528 # Create a temp index in the given directory.
529 self.__filename = os.path.join(
530 filename, 'index.temp-%d-%x' % (os.getpid(), id(self)))
533 self.__filename = filename
534 env = property(lambda self: utils.add_dict(
535 self.__repository.env, { 'GIT_INDEX_FILE': self.__filename }))
536 def read_tree(self, tree):
537 self.run(['git', 'read-tree', tree.sha1]).no_output()
538 def write_tree(self):
540 return self.__repository.get_tree(
541 self.run(['git', 'write-tree']).discard_stderr(
543 except run.RunException:
544 raise MergeException('Conflicting merge')
547 self.run(['git', 'update-index', '--refresh']).discard_output()
548 except run.RunException:
552 def apply(self, patch_text, quiet):
553 """In-index patch application, no worktree involved."""
555 r = self.run(['git', 'apply', '--cached']).raw_input(patch_text)
557 r = r.discard_stderr()
559 except run.RunException:
560 raise MergeException('Patch does not apply cleanly')
561 def apply_treediff(self, tree1, tree2, quiet):
562 """Apply the diff from C{tree1} to C{tree2} to the index."""
563 # Passing --full-index here is necessary to support binary
564 # files. It is also sufficient, since the repository already
565 # contains all involved objects; in other words, we don't have
567 self.apply(self.__repository.diff_tree(tree1, tree2, ['--full-index']),
570 if os.path.isfile(self.__filename):
571 os.remove(self.__filename)
573 """The set of conflicting paths."""
575 for line in self.run(['git', 'ls-files', '-z', '--unmerged']
576 ).raw_output().split('\0')[:-1]:
577 stat, path = line.split('\t', 1)
581 class Worktree(object):
582 """Represents a git worktree (that is, a checked-out file tree)."""
583 def __init__(self, directory):
584 self.__directory = directory
585 env = property(lambda self: { 'GIT_WORK_TREE': '.' })
586 directory = property(lambda self: self.__directory)
588 class CheckoutException(exception.StgException):
589 """Exception raised when a checkout fails."""
591 class IndexAndWorktree(RunWithEnvCwd):
592 """Represents a git index and a worktree. Anything that an index or
593 worktree can do on their own are handled by the L{Index} and
594 L{Worktree} classes; this class concerns itself with the
595 operations that require both."""
596 def __init__(self, index, worktree):
598 self.__worktree = worktree
599 index = property(lambda self: self.__index)
600 env = property(lambda self: utils.add_dict(self.__index.env,
601 self.__worktree.env))
602 cwd = property(lambda self: self.__worktree.directory)
603 def checkout(self, old_tree, new_tree):
604 # TODO: Optionally do a 3-way instead of doing nothing when we
605 # have a problem. Or maybe we should stash changes in a patch?
606 assert isinstance(old_tree, Tree)
607 assert isinstance(new_tree, Tree)
609 self.run(['git', 'read-tree', '-u', '-m',
610 '--exclude-per-directory=.gitignore',
611 old_tree.sha1, new_tree.sha1]
613 except run.RunException:
614 raise CheckoutException('Index/workdir dirty')
615 def merge(self, base, ours, theirs):
616 assert isinstance(base, Tree)
617 assert isinstance(ours, Tree)
618 assert isinstance(theirs, Tree)
620 r = self.run(['git', 'merge-recursive', base.sha1, '--', ours.sha1,
622 env = { 'GITHEAD_%s' % base.sha1: 'ancestor',
623 'GITHEAD_%s' % ours.sha1: 'current',
624 'GITHEAD_%s' % theirs.sha1: 'patched'})
626 except run.RunException, e:
628 raise MergeConflictException()
630 raise MergeException('Index/worktree dirty')
631 def changed_files(self):
632 return self.run(['git', 'diff-files', '--name-only']).output_lines()
633 def update_index(self, files):
634 self.run(['git', 'update-index', '--remove', '-z', '--stdin']
635 ).input_nulterm(files).discard_output()
637 class Branch(object):
638 """Represents a Git branch."""
639 def __init__(self, repository, name):
640 self.__repository = repository
645 raise BranchException('%s: no such branch' % name)
647 name = property(lambda self: self.__name)
648 repository = property(lambda self: self.__repository)
651 return 'refs/heads/%s' % self.__name
654 return self.__repository.refs.get(self.__ref())
655 def set_head(self, commit, msg):
656 self.__repository.refs.set(self.__ref(), commit, msg)
658 def set_parent_remote(self, name):
659 value = config.set('branch.%s.remote' % self.__name, name)
660 def set_parent_branch(self, name):
661 if config.get('branch.%s.remote' % self.__name):
662 # Never set merge if remote is not set to avoid
663 # possibly-erroneous lookups into 'origin'
664 config.set('branch.%s.merge' % self.__name, name)
667 def create(cls, repository, name, create_at = None):
668 """Create a new Git branch and return the corresponding
671 branch = cls(repository, name)
672 except BranchException:
675 raise BranchException('%s: branch already exists' % name)
677 cmd = ['git', 'branch']
679 cmd.append(create_at.sha1)
680 repository.run(['git', 'branch', create_at.sha1]).discard_output()
682 return cls(repository, name)