From d4678d22f1ac2fd6841b012232ce292346f53b4a Mon Sep 17 00:00:00 2001
From: Dave Hall <dave.hall@skwashd.com>
Date: Wed, 2 Jan 2013 20:29:31 +1100
Subject: [PATCH] Issue #1493714 by skwashd: Add support for gource custom log
 format

---
 drupal_log_generator.py |   52 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 46 insertions(+), 6 deletions(-)

diff --git a/drupal_log_generator.py b/drupal_log_generator.py
index 2591c29..d75fff1 100644
--- a/drupal_log_generator.py
+++ b/drupal_log_generator.py
@@ -39,7 +39,8 @@ def get_changes(git_changes):
     changes = []
     for line in git_changes.split("\n"):
        if line.startswith(":"):
-           changes.append(line.split("\t")[1])
+           elements = line.split()
+           changes.append([''.join(elements[5:]), elements[4]])
     return changes
 
 """
@@ -82,9 +83,31 @@ def get_authors(subject, commit_id):
         "improve to the core doxygen PHPdoc",
         "after coding style fixes",
         "and lots",
+        "lot of contributors",
+        "lots of contributors",
         "lots of other people",
-        "thousands of other people",
+        "thousands of other people"
         ]
+
+    random_garbage = [
+        "you can use this script   to check your code against the drupal coding style",
+        "a little bit from myself",
+        "fixed some syntax errors",
+        "with a bit of cleanup",
+        "with documentation from myself",
+        "to fix a module loading bug",
+        "based on initial suggestions from",
+        "with fixes from myself"
+        "with some modifications",
+        "no_commit_credit",
+        "patch by myself",
+        "patch my myself",
+        "in his caching presentation",
+        "with help from",
+        "with more docs from myself",
+        ]
+    replace_garbage = re.compile('|'.join(random_garbage), re.IGNORECASE)
+
     strip_chars = ". "
     if matches is None:
         return
@@ -92,6 +115,10 @@ def get_authors(subject, commit_id):
     authors = []
     for raw_author in raw_authors:
         raw_author = raw_author.strip()
+
+        # Remove the random garbage
+        raw_author = replace_garbage.sub('', raw_author)
+
         # try to avoid common non-standard messages once(non recursive)
         pos1 = raw_author.find("et al")
         pos2 = raw_author.find(" and ")
@@ -142,7 +169,14 @@ def get_authors(subject, commit_id):
             authors.append(raw_author.strip(strip_chars))
     return authors
 
+def format_line(output_format, commit_hash, timestamp, filename, author, change_type):
+  if 'gource' == output_format:
+    return '{0}|{1}|{2}|{3}'.format(timestamp, author.lower(), change_type, filename)
+  else:
+    return '{0},{1},"{2}","{3}"'.format(commit_hash, timestamp, filename, author)
+
 def main():
+    output_format = 'default'
 
     # handling argv
     usage = "usage: %prog [options] path [target]\n"\
@@ -153,7 +187,10 @@ def main():
             help="write output to FILE")
     parser.add_option("-a", "--full-history", action="store_true", dest="full_history",
             help="Use `--all` at git-log. This option is incompatible with target argument")
+    parser.add_option("-f", "--format", action="store", dest="output_format",
+            help="specify format for output - currently supported options 'default' and 'gource'")
     (options, args) = parser.parse_args()
+    
     if options.full_history:
         if len(args) != 1:
             parser.error("Incorrect number of arguments.\n"
@@ -167,6 +204,9 @@ def main():
     if options.filename is not None:
         sys.stdout = open(options.filename, 'w')
 
+    if output_format != options.output_format:
+        output_format = options.output_format
+
     path_to_repo = os.path.abspath(args[0])
     repo = git.Repo(path_to_repo)
 
@@ -177,15 +217,15 @@ def main():
         commit_id, timestamp, git_author, git_committer, subject = log_line.split("\t")
         git_changes = get_git_changes(repo, commit_id)
         changes = get_changes(git_changes)
-        for commited_file in changes:
-            print commit_id + ',' + timestamp + ',"' + commited_file + '","' + git_author + '"'
+        for commited_file, change_type in changes:
+            print format_line(output_format,commit_id, timestamp, commited_file, git_author, change_type)
             if git_author != git_committer:
-                print commit_id + ',' + timestamp + ',"' + commited_file + '","' + git_committer + '"'
+                print format_line(output_format, commit_id, timestamp, commited_file, git_committer, change_type)
             # process each author parsing subject
             authors = get_authors(subject, commit_id)
             if authors is not None:
                 for author in authors:
-                    print commit_id + ',' + timestamp + ',"' + commited_file + '","' + author + '"'
+                    print format_line(output_format, commit_id, timestamp, commited_file, author, change_type)
 
     """
     At post-process we need to take care of non-standard messages,
-- 
1.7.10.4

