[libcamera-devel,2/2] checkstyle: Add support for pre-commit hook

Message ID 20200116182603.108966-3-nicolas@ndufresne.ca
State Superseded
Headers show
Series
  • Add the ability to do pre-commit style check
Related show

Commit Message

Nicolas Dufresne Jan. 16, 2020, 6:26 p.m. UTC
From: Nicolas Dufresne <nicolas.dufresne@collabora.com>

This adds support for pre-commit hook workflow. In pre-commit hook we
check the style on the changes currently staged. Note that this patch
also includes a bit of sugar to support --amend.

Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
---
 utils/checkstyle.py    | 55 +++++++++++++++++++++++++++++-------------
 utils/hooks/pre-commit | 16 ++++++++++++
 2 files changed, 54 insertions(+), 17 deletions(-)
 mode change 100644 => 100755 utils/checkstyle.py
 create mode 100755 utils/hooks/pre-commit

Comments

Kieran Bingham Jan. 16, 2020, 10:25 p.m. UTC | #1
Hi Nicolas,

On 16/01/2020 18:26, nicolas@ndufresne.ca wrote:
> From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> 
> This adds support for pre-commit hook workflow. In pre-commit hook we
> check the style on the changes currently staged. Note that this patch
> also includes a bit of sugar to support --amend.
> 
> Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> ---
>  utils/checkstyle.py    | 55 +++++++++++++++++++++++++++++-------------
>  utils/hooks/pre-commit | 16 ++++++++++++
>  2 files changed, 54 insertions(+), 17 deletions(-)
>  mode change 100644 => 100755 utils/checkstyle.py
>  create mode 100755 utils/hooks/pre-commit
> 
> diff --git a/utils/checkstyle.py b/utils/checkstyle.py
> old mode 100644
> new mode 100755
> index 7edea25..23eb3f6
> --- a/utils/checkstyle.py
> +++ b/utils/checkstyle.py
> @@ -458,11 +458,16 @@ class StripTrailingSpaceFormatter(Formatter):
>  # Style checking
>  #
>  
> -def check_file(top_level, commit, filename):
> +def check_file(top_level, commit, filename, staged):
>      # Extract the line numbers touched by the commit.
> -    diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> -                           '%s/%s' % (top_level, filename)],
> -                          stdout=subprocess.PIPE).stdout

I wonder if we could/should automate this by detecting staged changes,
and then setting staged based on that?

	"git status --porcelain | grep ^M"

Could detect if there are staged changes.

But equally it might be better to allow the caller to be explicit.


> +    if staged:
> +        diff = subprocess.run(['git', 'diff', '--staged', commit, '--',
> +                               '%s/%s' % (top_level, filename)],
> +                              stdout=subprocess.PIPE).stdout
> +    else:
> +        diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> +                               '%s/%s' % (top_level, filename)],
> +                              stdout=subprocess.PIPE).stdout
>      diff = diff.decode('utf-8').splitlines(True)
>      commit_diff = parse_diff(diff)
>  
> @@ -476,8 +481,12 @@ def check_file(top_level, commit, filename):
>  
>      # Format the file after the commit with all formatters and compute the diff
>      # between the unformatted and formatted contents.
> -    after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> -                           stdout=subprocess.PIPE).stdout
> +    if staged:
> +        after = subprocess.run(['git', 'show', ':%s' % (filename)],
> +                               stdout=subprocess.PIPE).stdout
> +    else:
> +        after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> +                               stdout=subprocess.PIPE).stdout
>      after = after.decode('utf-8')
>  
>      formatted = after
> @@ -521,13 +530,20 @@ def check_file(top_level, commit, filename):
>      return len(formatted_diff) + len(issues)
>  
>  
> -def check_style(top_level, commit):
> -    # Get the commit title and list of files.
> -    ret = subprocess.run(['git', 'show', '--pretty=oneline','--name-only', commit],
> -                         stdout=subprocess.PIPE)
> -    files = ret.stdout.decode('utf-8').splitlines()
> -    title = files[0]
> -    files = files[1:]
> +def check_style(top_level, commit, staged):
> +    if staged:
> +        # Get list of staged changed files
> +        ret = subprocess.run(['git', 'diff', '--staged', '--name-only', commit],
> +                             stdout=subprocess.PIPE)
> +        files = ret.stdout.decode('utf-8').splitlines()
> +        title = "Pre-commit"

This is not necessarily a "Pre-commit" (hook). It's just staged.
A user could run the tool directly to validate some staged changes
before committing...

So I'd perhaps make this
	title = "Staged"


> +    else:
> +        # Get the commit title and list of files.
> +        ret = subprocess.run(['git', 'show', '--pretty=oneline', '--name-only', commit],
> +                             stdout=subprocess.PIPE)
> +        files = ret.stdout.decode('utf-8').splitlines()
> +        title = files[0]
> +        files = files[1:]
>  
>      separator = '-' * len(title)
>      print(separator)
> @@ -541,11 +557,11 @@ def check_style(top_level, commit):
>      files = [f for f in files if len([p for p in patterns if fnmatch.fnmatch(os.path.basename(f), p)])]
>      if len(files) == 0:
>          print("Commit doesn't touch source files, skipping")
> -        return
> +        return 0
>  
>      issues = 0
>      for f in files:
> -        issues += check_file(top_level, commit, f)
> +        issues += check_file(top_level, commit, f, staged)
>  
>      if issues == 0:
>          print("No style issue detected")
> @@ -554,6 +570,8 @@ def check_style(top_level, commit):
>          print("%u potential style %s detected, please review" % \
>                  (issues, 'issue' if issues == 1 else 'issues'))
>  
> +    return issues
> +
>  
>  def extract_revlist(revs):
>      """Extract a list of commits on which to operate from a revision or revision
> @@ -595,6 +613,8 @@ def main(argv):
>      parser = argparse.ArgumentParser()
>      parser.add_argument('--formatter', '-f', type=str, choices=['astyle', 'clang-format'],
>                          help='Code formatter. Default to clang-format if not specified.')
> +    parser.add_argument('--staged', '-s', action='store_true',
> +                        help='Looks at the staged changes, used for pre-commit, defaults to False')
>      parser.add_argument('revision_range', type=str, default='HEAD', nargs='?',
>                          help='Revision range (as defined by git rev-parse). Defaults to HEAD if not specified.')
>      args = parser.parse_args(argv[1:])
> @@ -632,11 +652,12 @@ def main(argv):
>  
>      revlist = extract_revlist(args.revision_range)
>  
> +    issues = 0
>      for commit in revlist:
> -        check_style(top_level, commit)
> +        issues += check_style(top_level, commit, args.staged)
>          print('')
>  
> -    return 0
> +    return issues

I'd be tempted to split the
  "checkstyle: Report issue count as a failure"
and
  "checkstyle: Support validating staged git state"

to two patches, but perhaps that's not necessary unless you see benefit
in splitting.


>  
>  
>  if __name__ == '__main__':
> diff --git a/utils/hooks/pre-commit b/utils/hooks/pre-commit
> new file mode 100755
> index 0000000..57d23ef
> --- /dev/null
> +++ b/utils/hooks/pre-commit

Though I really think this deserves it's own commit.

 "utils/hooks: Provide a pre-commit checkstyle hook"



> @@ -0,0 +1,16 @@
> +#!/bin/sh
> +
> +# Execute the checkstyle script before committing any code, failing the commit
> +# if needed. With this workflow, false positive can be ignored using:
> +#   git commit -n
> +#
> +# To utilise this hook, install this file with:
> +#   cp utils/hooks/pre-commit .git/hooks/pre-commit

One of the things I had toyed with is installing the hook by setting the
git hook directory to utils/hooks/ :

 git config core.hooksPath utils/hooks

But doing so now would invoke both the pre-commit and the post-commit hook.

I think it's good to provide the option of how someone might install
their hook

> +
> +commit=
> +if ps -ocommand= -p $PPID | grep -- "--amend"
> +then
> +   commit="HEAD~"

Is this really needed?

Edit: Yes, I've just played around with it - and I see what's happening.
Good catch to find that something extra was required here.


> +fi
> +
> +./utils/checkstyle.py --staged $commit


I wonder if the checkstyle.py could detect if we had staged changes if
we could fall back to needing only a single variant of the hook which
the user could choose to install as either a pre-commit or a post-commit
hook.

But perhaps it's more straight-forward to have two hooks to keep it
easier to support any future differences too.
Laurent Pinchart Jan. 16, 2020, 10:31 p.m. UTC | #2
Hi Kieran,

On Thu, Jan 16, 2020 at 10:25:31PM +0000, Kieran Bingham wrote:
> On 16/01/2020 18:26, nicolas@ndufresne.ca wrote:
> > From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > 
> > This adds support for pre-commit hook workflow. In pre-commit hook we
> > check the style on the changes currently staged. Note that this patch
> > also includes a bit of sugar to support --amend.
> > 
> > Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > ---
> >  utils/checkstyle.py    | 55 +++++++++++++++++++++++++++++-------------
> >  utils/hooks/pre-commit | 16 ++++++++++++
> >  2 files changed, 54 insertions(+), 17 deletions(-)
> >  mode change 100644 => 100755 utils/checkstyle.py
> >  create mode 100755 utils/hooks/pre-commit
> > 
> > diff --git a/utils/checkstyle.py b/utils/checkstyle.py
> > old mode 100644
> > new mode 100755
> > index 7edea25..23eb3f6
> > --- a/utils/checkstyle.py
> > +++ b/utils/checkstyle.py
> > @@ -458,11 +458,16 @@ class StripTrailingSpaceFormatter(Formatter):
> >  # Style checking
> >  #
> >  
> > -def check_file(top_level, commit, filename):
> > +def check_file(top_level, commit, filename, staged):
> >      # Extract the line numbers touched by the commit.
> > -    diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> > -                           '%s/%s' % (top_level, filename)],
> > -                          stdout=subprocess.PIPE).stdout
> 
> I wonder if we could/should automate this by detecting staged changes,
> and then setting staged based on that?
> 
> 	"git status --porcelain | grep ^M"
> 
> Could detect if there are staged changes.
> 
> But equally it might be better to allow the caller to be explicit.

I've been wondering about this too, I have a slight preference for
auto-detection, but that's not a hard requirement if too difficult to
implement or impractical to use.

> > +    if staged:
> > +        diff = subprocess.run(['git', 'diff', '--staged', commit, '--',
> > +                               '%s/%s' % (top_level, filename)],
> > +                              stdout=subprocess.PIPE).stdout
> > +    else:
> > +        diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> > +                               '%s/%s' % (top_level, filename)],
> > +                              stdout=subprocess.PIPE).stdout
> >      diff = diff.decode('utf-8').splitlines(True)
> >      commit_diff = parse_diff(diff)
> >  
> > @@ -476,8 +481,12 @@ def check_file(top_level, commit, filename):
> >  
> >      # Format the file after the commit with all formatters and compute the diff
> >      # between the unformatted and formatted contents.
> > -    after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> > -                           stdout=subprocess.PIPE).stdout
> > +    if staged:
> > +        after = subprocess.run(['git', 'show', ':%s' % (filename)],
> > +                               stdout=subprocess.PIPE).stdout
> > +    else:
> > +        after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> > +                               stdout=subprocess.PIPE).stdout
> >      after = after.decode('utf-8')
> >  
> >      formatted = after
> > @@ -521,13 +530,20 @@ def check_file(top_level, commit, filename):
> >      return len(formatted_diff) + len(issues)
> >  
> >  
> > -def check_style(top_level, commit):
> > -    # Get the commit title and list of files.
> > -    ret = subprocess.run(['git', 'show', '--pretty=oneline','--name-only', commit],
> > -                         stdout=subprocess.PIPE)
> > -    files = ret.stdout.decode('utf-8').splitlines()
> > -    title = files[0]
> > -    files = files[1:]
> > +def check_style(top_level, commit, staged):
> > +    if staged:
> > +        # Get list of staged changed files
> > +        ret = subprocess.run(['git', 'diff', '--staged', '--name-only', commit],
> > +                             stdout=subprocess.PIPE)
> > +        files = ret.stdout.decode('utf-8').splitlines()
> > +        title = "Pre-commit"
> 
> This is not necessarily a "Pre-commit" (hook). It's just staged.
> A user could run the tool directly to validate some staged changes
> before committing...
> 
> So I'd perhaps make this
> 	title = "Staged"

Or maybe "Staged changes" to be slightly more explicit ?

> > +    else:
> > +        # Get the commit title and list of files.
> > +        ret = subprocess.run(['git', 'show', '--pretty=oneline', '--name-only', commit],
> > +                             stdout=subprocess.PIPE)
> > +        files = ret.stdout.decode('utf-8').splitlines()
> > +        title = files[0]
> > +        files = files[1:]
> >  
> >      separator = '-' * len(title)
> >      print(separator)
> > @@ -541,11 +557,11 @@ def check_style(top_level, commit):
> >      files = [f for f in files if len([p for p in patterns if fnmatch.fnmatch(os.path.basename(f), p)])]
> >      if len(files) == 0:
> >          print("Commit doesn't touch source files, skipping")
> > -        return
> > +        return 0
> >  
> >      issues = 0
> >      for f in files:
> > -        issues += check_file(top_level, commit, f)
> > +        issues += check_file(top_level, commit, f, staged)
> >  
> >      if issues == 0:
> >          print("No style issue detected")
> > @@ -554,6 +570,8 @@ def check_style(top_level, commit):
> >          print("%u potential style %s detected, please review" % \
> >                  (issues, 'issue' if issues == 1 else 'issues'))
> >  
> > +    return issues
> > +
> >  
> >  def extract_revlist(revs):
> >      """Extract a list of commits on which to operate from a revision or revision
> > @@ -595,6 +613,8 @@ def main(argv):
> >      parser = argparse.ArgumentParser()
> >      parser.add_argument('--formatter', '-f', type=str, choices=['astyle', 'clang-format'],
> >                          help='Code formatter. Default to clang-format if not specified.')
> > +    parser.add_argument('--staged', '-s', action='store_true',
> > +                        help='Looks at the staged changes, used for pre-commit, defaults to False')
> >      parser.add_argument('revision_range', type=str, default='HEAD', nargs='?',
> >                          help='Revision range (as defined by git rev-parse). Defaults to HEAD if not specified.')
> >      args = parser.parse_args(argv[1:])
> > @@ -632,11 +652,12 @@ def main(argv):
> >  
> >      revlist = extract_revlist(args.revision_range)
> >  
> > +    issues = 0
> >      for commit in revlist:
> > -        check_style(top_level, commit)
> > +        issues += check_style(top_level, commit, args.staged)
> >          print('')
> >  
> > -    return 0
> > +    return issues
> 
> I'd be tempted to split the
>   "checkstyle: Report issue count as a failure"
> and
>   "checkstyle: Support validating staged git state"
> 
> to two patches, but perhaps that's not necessary unless you see benefit
> in splitting.

I don't think there's a need to resubmit just for this, but if a v2 is
needed, it may be nice to split the changes indeed, to explain why
changing the return value is needed.

According to the exit man page,

RATIONALE
       As explained in other sections, certain exit status values have been reserved for special uses and should be used by applications only for those purposes:

        126    A file to be executed was found, but it was not an executable utility.

        127    A utility to be executed was not found.

       >128    A command was interrupted by a signal.

And according to exit(),

The value status & 0377 is returned to the parent process as the process's exit status, and can be collected using one of the wait(2) family of calls.

In the unfortunate event that a multiple of 256 issues would be found,
the caller would think everything went fine. Should we thus return 0 on
success and 1 if any issue was found ?

> >
> >
> >  if __name__ == '__main__':
> > diff --git a/utils/hooks/pre-commit b/utils/hooks/pre-commit
> > new file mode 100755
> > index 0000000..57d23ef
> > --- /dev/null
> > +++ b/utils/hooks/pre-commit
> 
> Though I really think this deserves it's own commit.
> 
>  "utils/hooks: Provide a pre-commit checkstyle hook"
> 
> > @@ -0,0 +1,16 @@
> > +#!/bin/sh
> > +
> > +# Execute the checkstyle script before committing any code, failing the commit
> > +# if needed. With this workflow, false positive can be ignored using:
> > +#   git commit -n
> > +#
> > +# To utilise this hook, install this file with:
> > +#   cp utils/hooks/pre-commit .git/hooks/pre-commit
> 
> One of the things I had toyed with is installing the hook by setting the
> git hook directory to utils/hooks/ :
> 
>  git config core.hooksPath utils/hooks
> 
> But doing so now would invoke both the pre-commit and the post-commit hook.
> 
> I think it's good to provide the option of how someone might install
> their hook
> 
> > +
> > +commit=
> > +if ps -ocommand= -p $PPID | grep -- "--amend"
> > +then
> > +   commit="HEAD~"
> 
> Is this really needed?
> 
> Edit: Yes, I've just played around with it - and I see what's happening.
> Good catch to find that something extra was required here.
> 
> > +fi
> > +
> > +./utils/checkstyle.py --staged $commit
> 
> I wonder if the checkstyle.py could detect if we had staged changes if
> we could fall back to needing only a single variant of the hook which
> the user could choose to install as either a pre-commit or a post-commit
> hook.
> 
> But perhaps it's more straight-forward to have two hooks to keep it
> easier to support any future differences too.

I'm fine either way.
Nicolas Dufresne Jan. 17, 2020, 1:23 a.m. UTC | #3
Le vendredi 17 janvier 2020 à 00:31 +0200, Laurent Pinchart a écrit :
> Hi Kieran,
> 
> On Thu, Jan 16, 2020 at 10:25:31PM +0000, Kieran Bingham wrote:
> > On 16/01/2020 18:26, nicolas@ndufresne.ca wrote:
> > > From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > > 
> > > This adds support for pre-commit hook workflow. In pre-commit hook we
> > > check the style on the changes currently staged. Note that this patch
> > > also includes a bit of sugar to support --amend.
> > > 
> > > Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > > ---
> > >  utils/checkstyle.py    | 55 +++++++++++++++++++++++++++++-------------
> > >  utils/hooks/pre-commit | 16 ++++++++++++
> > >  2 files changed, 54 insertions(+), 17 deletions(-)
> > >  mode change 100644 => 100755 utils/checkstyle.py
> > >  create mode 100755 utils/hooks/pre-commit
> > > 
> > > diff --git a/utils/checkstyle.py b/utils/checkstyle.py
> > > old mode 100644
> > > new mode 100755
> > > index 7edea25..23eb3f6
> > > --- a/utils/checkstyle.py
> > > +++ b/utils/checkstyle.py
> > > @@ -458,11 +458,16 @@ class StripTrailingSpaceFormatter(Formatter):
> > >  # Style checking
> > >  #
> > >  
> > > -def check_file(top_level, commit, filename):
> > > +def check_file(top_level, commit, filename, staged):
> > >      # Extract the line numbers touched by the commit.
> > > -    diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> > > -                           '%s/%s' % (top_level, filename)],
> > > -                          stdout=subprocess.PIPE).stdout
> > 
> > I wonder if we could/should automate this by detecting staged changes,
> > and then setting staged based on that?
> > 
> > 	"git status --porcelain | grep ^M"
> > 
> > Could detect if there are staged changes.
> > 
> > But equally it might be better to allow the caller to be explicit.
> 
> I've been wondering about this too, I have a slight preference for
> auto-detection, but that's not a hard requirement if too difficult to
> implement or impractical to use.

I don't think it hard, but I wasn't sure it was a good idea. One thing
that I've been a little slowpy is with the meaning of commit here (of
the rev list in fact). Before I added amend support, I was simply
ignoring that (no meaning). But then I recycled it to support the
commit --amend case, where you want to included the staged changes and
the previous commit.

What I found worked to get that information was to only pass one hash,
which is the commit before the last commit (HEAD~). Anyway, I think I
should fix this, and  so '%s~' % commit instead if I want to maintained
the behaviour. But by doing that, there is no longer anything to
describe that when I commit --amend, I want the combined report, not
just what was staged. And that I also don't want the previous commit to
be checked independently from the staged, because that would produce
that warning I'm trying to fix.

So my conclusion, is that to disambiguate all this, I should in fact be
even more explicit, and also introduce --amend. A better implementation
would be to find a way to integrated these special revisions into the
revlist, this way (not sure if there is a use case) but you could
request a report for specific commits, the staging changes and the
amended changes in one call.

> 
> > > +    if staged:
> > > +        diff = subprocess.run(['git', 'diff', '--staged', commit, '--',
> > > +                               '%s/%s' % (top_level, filename)],
> > > +                              stdout=subprocess.PIPE).stdout

> > > +    else:
> > > +        diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> > > +                               '%s/%s' % (top_level, filename)],
> > > +                              stdout=subprocess.PIPE).stdout
> > >      diff = diff.decode('utf-8').splitlines(True)
> > >      commit_diff = parse_diff(diff)
> > >  
> > > @@ -476,8 +481,12 @@ def check_file(top_level, commit, filename):
> > >  
> > >      # Format the file after the commit with all formatters and compute the diff
> > >      # between the unformatted and formatted contents.
> > > -    after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> > > -                           stdout=subprocess.PIPE).stdout
> > > +    if staged:
> > > +        after = subprocess.run(['git', 'show', ':%s' % (filename)],
> > > +                               stdout=subprocess.PIPE).stdout
> > > +    else:
> > > +        after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> > > +                               stdout=subprocess.PIPE).stdout
> > >      after = after.decode('utf-8')
> > >  
> > >      formatted = after
> > > @@ -521,13 +530,20 @@ def check_file(top_level, commit, filename):
> > >      return len(formatted_diff) + len(issues)
> > >  
> > >  
> > > -def check_style(top_level, commit):
> > > -    # Get the commit title and list of files.
> > > -    ret = subprocess.run(['git', 'show', '--pretty=oneline','--name-only', commit],
> > > -                         stdout=subprocess.PIPE)
> > > -    files = ret.stdout.decode('utf-8').splitlines()
> > > -    title = files[0]
> > > -    files = files[1:]
> > > +def check_style(top_level, commit, staged):
> > > +    if staged:
> > > +        # Get list of staged changed files
> > > +        ret = subprocess.run(['git', 'diff', '--staged', '--name-only', commit],
> > > +                             stdout=subprocess.PIPE)
> > > +        files = ret.stdout.decode('utf-8').splitlines()
> > > +        title = "Pre-commit"
> > 
> > This is not necessarily a "Pre-commit" (hook). It's just staged.
> > A user could run the tool directly to validate some staged changes
> > before committing...
> > 
> > So I'd perhaps make this
> > 	title = "Staged"
> 
> Or maybe "Staged changes" to be slightly more explicit ?

It started "Staged", then I splitted it between amend and staged, and
for some reason ended up like this. I think I'll split it again, and
use "Staged changes" or "Amended changes".

> 
> > > +    else:
> > > +        # Get the commit title and list of files.
> > > +        ret = subprocess.run(['git', 'show', '--pretty=oneline', '--name-only', commit],
> > > +                             stdout=subprocess.PIPE)
> > > +        files = ret.stdout.decode('utf-8').splitlines()
> > > +        title = files[0]
> > > +        files = files[1:]
> > >  
> > >      separator = '-' * len(title)
> > >      print(separator)
> > > @@ -541,11 +557,11 @@ def check_style(top_level, commit):
> > >      files = [f for f in files if len([p for p in patterns if fnmatch.fnmatch(os.path.basename(f), p)])]
> > >      if len(files) == 0:
> > >          print("Commit doesn't touch source files, skipping")
> > > -        return
> > > +        return 0
> > >  
> > >      issues = 0
> > >      for f in files:
> > > -        issues += check_file(top_level, commit, f)
> > > +        issues += check_file(top_level, commit, f, staged)
> > >  
> > >      if issues == 0:
> > >          print("No style issue detected")
> > > @@ -554,6 +570,8 @@ def check_style(top_level, commit):
> > >          print("%u potential style %s detected, please review" % \
> > >                  (issues, 'issue' if issues == 1 else 'issues'))
> > >  
> > > +    return issues
> > > +
> > >  
> > >  def extract_revlist(revs):
> > >      """Extract a list of commits on which to operate from a revision or revision
> > > @@ -595,6 +613,8 @@ def main(argv):
> > >      parser = argparse.ArgumentParser()
> > >      parser.add_argument('--formatter', '-f', type=str, choices=['astyle', 'clang-format'],
> > >                          help='Code formatter. Default to clang-format if not specified.')
> > > +    parser.add_argument('--staged', '-s', action='store_true',
> > > +                        help='Looks at the staged changes, used for pre-commit, defaults to False')
> > >      parser.add_argument('revision_range', type=str, default='HEAD', nargs='?',
> > >                          help='Revision range (as defined by git rev-parse). Defaults to HEAD if not specified.')
> > >      args = parser.parse_args(argv[1:])
> > > @@ -632,11 +652,12 @@ def main(argv):
> > >  
> > >      revlist = extract_revlist(args.revision_range)
> > >  
> > > +    issues = 0
> > >      for commit in revlist:
> > > -        check_style(top_level, commit)
> > > +        issues += check_style(top_level, commit, args.staged)
> > >          print('')
> > >  
> > > -    return 0
> > > +    return issues
> > 
> > I'd be tempted to split the
> >   "checkstyle: Report issue count as a failure"
> > and
> >   "checkstyle: Support validating staged git state"
> > 
> > to two patches, but perhaps that's not necessary unless you see benefit
> > in splitting.
> 
> I don't think there's a need to resubmit just for this, but if a v2 is
> needed, it may be nice to split the changes indeed, to explain why
> changing the return value is needed.

I think a v2 is needed, so I'll do.

> 
> According to the exit man page,
> 
> RATIONALE
>        As explained in other sections, certain exit status values have been reserved for special uses and should be used by applications only for those purposes:
> 
>         126    A file to be executed was found, but it was not an executable utility.
> 
>         127    A utility to be executed was not found.
> 
>        >128    A command was interrupted by a signal.
> 
> And according to exit(),
> 
> The value status & 0377 is returned to the parent process as the process's exit status, and can be collected using one of the wait(2) family of calls.
> 
> In the unfortunate event that a multiple of 256 issues would be found,
> the caller would think everything went fine. Should we thus return 0 on
> success and 1 if any issue was found ?

Agreed, will turn the number of issues into 0 and 1.

> 
> > > 
> > >  if __name__ == '__main__':
> > > diff --git a/utils/hooks/pre-commit b/utils/hooks/pre-commit
> > > new file mode 100755
> > > index 0000000..57d23ef
> > > --- /dev/null
> > > +++ b/utils/hooks/pre-commit
> > 
> > Though I really think this deserves it's own commit.
> > 
> >  "utils/hooks: Provide a pre-commit checkstyle hook"
> > 
> > > @@ -0,0 +1,16 @@
> > > +#!/bin/sh
> > > +
> > > +# Execute the checkstyle script before committing any code, failing the commit
> > > +# if needed. With this workflow, false positive can be ignored using:
> > > +#   git commit -n
> > > +#
> > > +# To utilise this hook, install this file with:
> > > +#   cp utils/hooks/pre-commit .git/hooks/pre-commit
> > 
> > One of the things I had toyed with is installing the hook by setting the
> > git hook directory to utils/hooks/ :
> > 
> >  git config core.hooksPath utils/hooks
> > 
> > But doing so now would invoke both the pre-commit and the post-commit hook.
> > 
> > I think it's good to provide the option of how someone might install
> > their hook
> > 
> > > +
> > > +commit=
> > > +if ps -ocommand= -p $PPID | grep -- "--amend"
> > > +then
> > > +   commit="HEAD~"
> > 
> > Is this really needed?
> > 
> > Edit: Yes, I've just played around with it - and I see what's happening.
> > Good catch to find that something extra was required here.
> > 
> > > +fi
> > > +
> > > +./utils/checkstyle.py --staged $commit
> > 
> > I wonder if the checkstyle.py could detect if we had staged changes if
> > we could fall back to needing only a single variant of the hook which
> > the user could choose to install as either a pre-commit or a post-commit
> > hook.
> > 
> > But perhaps it's more straight-forward to have two hooks to keep it
> > easier to support any future differences too.
> 
> I'm fine either way.

I think having two files in hook/ directory will be less confusing 
even if they are identical in the end. But I don't think the auto
detection is a good idea, it's makes the CLI of the tool ambiguous at
least.
Laurent Pinchart Jan. 17, 2020, 7:46 a.m. UTC | #4
Hi Nicolas,

On Thu, Jan 16, 2020 at 08:23:36PM -0500, Nicolas Dufresne wrote:
> Le vendredi 17 janvier 2020 à 00:31 +0200, Laurent Pinchart a écrit :
> > On Thu, Jan 16, 2020 at 10:25:31PM +0000, Kieran Bingham wrote:
> >> On 16/01/2020 18:26, nicolas@ndufresne.ca wrote:
> >>> From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> >>> 
> >>> This adds support for pre-commit hook workflow. In pre-commit hook we
> >>> check the style on the changes currently staged. Note that this patch
> >>> also includes a bit of sugar to support --amend.
> >>> 
> >>> Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> >>> ---
> >>>  utils/checkstyle.py    | 55 +++++++++++++++++++++++++++++-------------
> >>>  utils/hooks/pre-commit | 16 ++++++++++++
> >>>  2 files changed, 54 insertions(+), 17 deletions(-)
> >>>  mode change 100644 => 100755 utils/checkstyle.py
> >>>  create mode 100755 utils/hooks/pre-commit
> >>> 
> >>> diff --git a/utils/checkstyle.py b/utils/checkstyle.py
> >>> old mode 100644
> >>> new mode 100755
> >>> index 7edea25..23eb3f6
> >>> --- a/utils/checkstyle.py
> >>> +++ b/utils/checkstyle.py
> >>> @@ -458,11 +458,16 @@ class StripTrailingSpaceFormatter(Formatter):
> >>>  # Style checking
> >>>  #
> >>>  
> >>> -def check_file(top_level, commit, filename):
> >>> +def check_file(top_level, commit, filename, staged):
> >>>      # Extract the line numbers touched by the commit.
> >>> -    diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> >>> -                           '%s/%s' % (top_level, filename)],
> >>> -                          stdout=subprocess.PIPE).stdout
> >> 
> >> I wonder if we could/should automate this by detecting staged changes,
> >> and then setting staged based on that?
> >> 
> >> 	"git status --porcelain | grep ^M"
> >> 
> >> Could detect if there are staged changes.
> >> 
> >> But equally it might be better to allow the caller to be explicit.
> > 
> > I've been wondering about this too, I have a slight preference for
> > auto-detection, but that's not a hard requirement if too difficult to
> > implement or impractical to use.
> 
> I don't think it hard, but I wasn't sure it was a good idea. One thing
> that I've been a little slowpy is with the meaning of commit here (of
> the rev list in fact). Before I added amend support, I was simply
> ignoring that (no meaning). But then I recycled it to support the
> commit --amend case, where you want to included the staged changes and
> the previous commit.
> 
> What I found worked to get that information was to only pass one hash,
> which is the commit before the last commit (HEAD~). Anyway, I think I
> should fix this, and  so '%s~' % commit instead if I want to maintained
> the behaviour. But by doing that, there is no longer anything to
> describe that when I commit --amend, I want the combined report, not
> just what was staged. And that I also don't want the previous commit to
> be checked independently from the staged, because that would produce
> that warning I'm trying to fix.
> 
> So my conclusion, is that to disambiguate all this, I should in fact be
> even more explicit, and also introduce --amend. A better implementation
> would be to find a way to integrated these special revisions into the
> revlist, this way (not sure if there is a use case) but you could
> request a report for specific commits, the staging changes and the
> amended changes in one call.

That would be neat indeed. From a quick look at git-rev-parse there's no
revision syntax to express this that I could find. We could extend the
revision syntax, but that may be dangerous.

If we instead go for explicit arguments, --staged or --cached would be
fine with me, but I'd rather use --worktree (or something similar) than
--amend to reflect that the revision corresponds to the working tree.

> >>> +    if staged:
> >>> +        diff = subprocess.run(['git', 'diff', '--staged', commit, '--',
> >>> +                               '%s/%s' % (top_level, filename)],
> >>> +                              stdout=subprocess.PIPE).stdout
> >>> +    else:
> >>> +        diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> >>> +                               '%s/%s' % (top_level, filename)],
> >>> +                              stdout=subprocess.PIPE).stdout
> >>>      diff = diff.decode('utf-8').splitlines(True)
> >>>      commit_diff = parse_diff(diff)
> >>>  
> >>> @@ -476,8 +481,12 @@ def check_file(top_level, commit, filename):
> >>>  
> >>>      # Format the file after the commit with all formatters and compute the diff
> >>>      # between the unformatted and formatted contents.
> >>> -    after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> >>> -                           stdout=subprocess.PIPE).stdout
> >>> +    if staged:
> >>> +        after = subprocess.run(['git', 'show', ':%s' % (filename)],
> >>> +                               stdout=subprocess.PIPE).stdout
> >>> +    else:
> >>> +        after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> >>> +                               stdout=subprocess.PIPE).stdout
> >>>      after = after.decode('utf-8')
> >>>  
> >>>      formatted = after
> >>> @@ -521,13 +530,20 @@ def check_file(top_level, commit, filename):
> >>>      return len(formatted_diff) + len(issues)
> >>>  
> >>>  
> >>> -def check_style(top_level, commit):
> >>> -    # Get the commit title and list of files.
> >>> -    ret = subprocess.run(['git', 'show', '--pretty=oneline','--name-only', commit],
> >>> -                         stdout=subprocess.PIPE)
> >>> -    files = ret.stdout.decode('utf-8').splitlines()
> >>> -    title = files[0]
> >>> -    files = files[1:]
> >>> +def check_style(top_level, commit, staged):
> >>> +    if staged:
> >>> +        # Get list of staged changed files
> >>> +        ret = subprocess.run(['git', 'diff', '--staged', '--name-only', commit],
> >>> +                             stdout=subprocess.PIPE)
> >>> +        files = ret.stdout.decode('utf-8').splitlines()
> >>> +        title = "Pre-commit"
> >> 
> >> This is not necessarily a "Pre-commit" (hook). It's just staged.
> >> A user could run the tool directly to validate some staged changes
> >> before committing...
> >> 
> >> So I'd perhaps make this
> >> 	title = "Staged"
> > 
> > Or maybe "Staged changes" to be slightly more explicit ?
> 
> It started "Staged", then I splitted it between amend and staged, and
> for some reason ended up like this. I think I'll split it again, and
> use "Staged changes" or "Amended changes".

Maybe "Work tree changes" instead of "Amended changes" ?

> >>> +    else:
> >>> +        # Get the commit title and list of files.
> >>> +        ret = subprocess.run(['git', 'show', '--pretty=oneline', '--name-only', commit],
> >>> +                             stdout=subprocess.PIPE)
> >>> +        files = ret.stdout.decode('utf-8').splitlines()
> >>> +        title = files[0]
> >>> +        files = files[1:]
> >>>  
> >>>      separator = '-' * len(title)
> >>>      print(separator)
> >>> @@ -541,11 +557,11 @@ def check_style(top_level, commit):
> >>>      files = [f for f in files if len([p for p in patterns if fnmatch.fnmatch(os.path.basename(f), p)])]
> >>>      if len(files) == 0:
> >>>          print("Commit doesn't touch source files, skipping")
> >>> -        return
> >>> +        return 0
> >>>  
> >>>      issues = 0
> >>>      for f in files:
> >>> -        issues += check_file(top_level, commit, f)
> >>> +        issues += check_file(top_level, commit, f, staged)
> >>>  
> >>>      if issues == 0:
> >>>          print("No style issue detected")
> >>> @@ -554,6 +570,8 @@ def check_style(top_level, commit):
> >>>          print("%u potential style %s detected, please review" % \
> >>>                  (issues, 'issue' if issues == 1 else 'issues'))
> >>>  
> >>> +    return issues
> >>> +
> >>>  
> >>>  def extract_revlist(revs):
> >>>      """Extract a list of commits on which to operate from a revision or revision
> >>> @@ -595,6 +613,8 @@ def main(argv):
> >>>      parser = argparse.ArgumentParser()
> >>>      parser.add_argument('--formatter', '-f', type=str, choices=['astyle', 'clang-format'],
> >>>                          help='Code formatter. Default to clang-format if not specified.')
> >>> +    parser.add_argument('--staged', '-s', action='store_true',
> >>> +                        help='Looks at the staged changes, used for pre-commit, defaults to False')
> >>>      parser.add_argument('revision_range', type=str, default='HEAD', nargs='?',
> >>>                          help='Revision range (as defined by git rev-parse). Defaults to HEAD if not specified.')
> >>>      args = parser.parse_args(argv[1:])
> >>> @@ -632,11 +652,12 @@ def main(argv):
> >>>  
> >>>      revlist = extract_revlist(args.revision_range)
> >>>  
> >>> +    issues = 0
> >>>      for commit in revlist:
> >>> -        check_style(top_level, commit)
> >>> +        issues += check_style(top_level, commit, args.staged)
> >>>          print('')
> >>>  
> >>> -    return 0
> >>> +    return issues
> >> 
> >> I'd be tempted to split the
> >>   "checkstyle: Report issue count as a failure"
> >> and
> >>   "checkstyle: Support validating staged git state"
> >> 
> >> to two patches, but perhaps that's not necessary unless you see benefit
> >> in splitting.
> > 
> > I don't think there's a need to resubmit just for this, but if a v2 is
> > needed, it may be nice to split the changes indeed, to explain why
> > changing the return value is needed.
> 
> I think a v2 is needed, so I'll do.
> 
> > According to the exit man page,
> > 
> > RATIONALE
> >        As explained in other sections, certain exit status values have been reserved for special uses and should be used by applications only for those purposes:
> > 
> >         126    A file to be executed was found, but it was not an executable utility.
> > 
> >         127    A utility to be executed was not found.
> > 
> >        >128    A command was interrupted by a signal.
> > 
> > And according to exit(),
> > 
> > The value status & 0377 is returned to the parent process as the process's exit status, and can be collected using one of the wait(2) family of calls.
> > 
> > In the unfortunate event that a multiple of 256 issues would be found,
> > the caller would think everything went fine. Should we thus return 0 on
> > success and 1 if any issue was found ?
> 
> Agreed, will turn the number of issues into 0 and 1.
> 
> >>> 
> >>>  if __name__ == '__main__':
> >>> diff --git a/utils/hooks/pre-commit b/utils/hooks/pre-commit
> >>> new file mode 100755
> >>> index 0000000..57d23ef
> >>> --- /dev/null
> >>> +++ b/utils/hooks/pre-commit
> >> 
> >> Though I really think this deserves it's own commit.
> >> 
> >>  "utils/hooks: Provide a pre-commit checkstyle hook"
> >> 
> >>> @@ -0,0 +1,16 @@
> >>> +#!/bin/sh
> >>> +
> >>> +# Execute the checkstyle script before committing any code, failing the commit
> >>> +# if needed. With this workflow, false positive can be ignored using:
> >>> +#   git commit -n
> >>> +#
> >>> +# To utilise this hook, install this file with:
> >>> +#   cp utils/hooks/pre-commit .git/hooks/pre-commit
> >> 
> >> One of the things I had toyed with is installing the hook by setting the
> >> git hook directory to utils/hooks/ :
> >> 
> >>  git config core.hooksPath utils/hooks
> >> 
> >> But doing so now would invoke both the pre-commit and the post-commit hook.
> >> 
> >> I think it's good to provide the option of how someone might install
> >> their hook
> >> 
> >>> +
> >>> +commit=
> >>> +if ps -ocommand= -p $PPID | grep -- "--amend"
> >>> +then
> >>> +   commit="HEAD~"
> >> 
> >> Is this really needed?
> >> 
> >> Edit: Yes, I've just played around with it - and I see what's happening.
> >> Good catch to find that something extra was required here.
> >> 
> >>> +fi
> >>> +
> >>> +./utils/checkstyle.py --staged $commit
> >> 
> >> I wonder if the checkstyle.py could detect if we had staged changes if
> >> we could fall back to needing only a single variant of the hook which
> >> the user could choose to install as either a pre-commit or a post-commit
> >> hook.
> >> 
> >> But perhaps it's more straight-forward to have two hooks to keep it
> >> easier to support any future differences too.
> > 
> > I'm fine either way.
> 
> I think having two files in hook/ directory will be less confusing 
> even if they are identical in the end. But I don't think the auto
> detection is a good idea, it's makes the CLI of the tool ambiguous at
> least.
Nicolas Dufresne Jan. 17, 2020, 1:50 p.m. UTC | #5
Le vendredi 17 janvier 2020 à 09:46 +0200, Laurent Pinchart a écrit :
> Hi Nicolas,
> 
> On Thu, Jan 16, 2020 at 08:23:36PM -0500, Nicolas Dufresne wrote:
> > Le vendredi 17 janvier 2020 à 00:31 +0200, Laurent Pinchart a écrit :
> > > On Thu, Jan 16, 2020 at 10:25:31PM +0000, Kieran Bingham wrote:
> > > > On 16/01/2020 18:26, nicolas@ndufresne.ca wrote:
> > > > > From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > > > > 
> > > > > This adds support for pre-commit hook workflow. In pre-commit hook we
> > > > > check the style on the changes currently staged. Note that this patch
> > > > > also includes a bit of sugar to support --amend.
> > > > > 
> > > > > Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > > > > ---
> > > > >  utils/checkstyle.py    | 55 +++++++++++++++++++++++++++++-------------
> > > > >  utils/hooks/pre-commit | 16 ++++++++++++
> > > > >  2 files changed, 54 insertions(+), 17 deletions(-)
> > > > >  mode change 100644 => 100755 utils/checkstyle.py
> > > > >  create mode 100755 utils/hooks/pre-commit
> > > > > 
> > > > > diff --git a/utils/checkstyle.py b/utils/checkstyle.py
> > > > > old mode 100644
> > > > > new mode 100755
> > > > > index 7edea25..23eb3f6
> > > > > --- a/utils/checkstyle.py
> > > > > +++ b/utils/checkstyle.py
> > > > > @@ -458,11 +458,16 @@ class StripTrailingSpaceFormatter(Formatter):
> > > > >  # Style checking
> > > > >  #
> > > > >  
> > > > > -def check_file(top_level, commit, filename):
> > > > > +def check_file(top_level, commit, filename, staged):
> > > > >      # Extract the line numbers touched by the commit.
> > > > > -    diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> > > > > -                           '%s/%s' % (top_level, filename)],
> > > > > -                          stdout=subprocess.PIPE).stdout
> > > > 
> > > > I wonder if we could/should automate this by detecting staged changes,
> > > > and then setting staged based on that?
> > > > 
> > > > 	"git status --porcelain | grep ^M"
> > > > 
> > > > Could detect if there are staged changes.
> > > > 
> > > > But equally it might be better to allow the caller to be explicit.
> > > 
> > > I've been wondering about this too, I have a slight preference for
> > > auto-detection, but that's not a hard requirement if too difficult to
> > > implement or impractical to use.
> > 
> > I don't think it hard, but I wasn't sure it was a good idea. One thing
> > that I've been a little slowpy is with the meaning of commit here (of
> > the rev list in fact). Before I added amend support, I was simply
> > ignoring that (no meaning). But then I recycled it to support the
> > commit --amend case, where you want to included the staged changes and
> > the previous commit.
> > 
> > What I found worked to get that information was to only pass one hash,
> > which is the commit before the last commit (HEAD~). Anyway, I think I
> > should fix this, and  so '%s~' % commit instead if I want to maintained
> > the behaviour. But by doing that, there is no longer anything to
> > describe that when I commit --amend, I want the combined report, not
> > just what was staged. And that I also don't want the previous commit to
> > be checked independently from the staged, because that would produce
> > that warning I'm trying to fix.
> > 
> > So my conclusion, is that to disambiguate all this, I should in fact be
> > even more explicit, and also introduce --amend. A better implementation
> > would be to find a way to integrated these special revisions into the
> > revlist, this way (not sure if there is a use case) but you could
> > request a report for specific commits, the staging changes and the
> > amended changes in one call.
> 
> That would be neat indeed. From a quick look at git-rev-parse there's no
> revision syntax to express this that I could find. We could extend the
> revision syntax, but that may be dangerous.

I was thinking to keep the --staged/amend options, but to use mixed
type in the python list, and do type checks when iterating. That would
avoid the binary parameter, which is just bad API really.

> 
> If we instead go for explicit arguments, --staged or --cached would be
> fine with me, but I'd rather use --worktree (or something similar) than
> --amend to reflect that the revision corresponds to the working tree.

I believe worktree is a much worst choice as it matches a completely
unrelated concept in git (see git worktree --help). The concept of an
amendment isn't defined, but can logically match the combination of the
previous commit and the staged changes, since this is what git commit 
--amend will combine to replace the last commit.

> 
> > > > > +    if staged:
> > > > > +        diff = subprocess.run(['git', 'diff', '--staged', commit, '--',
> > > > > +                               '%s/%s' % (top_level, filename)],
> > > > > +                              stdout=subprocess.PIPE).stdout
> > > > > +    else:
> > > > > +        diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> > > > > +                               '%s/%s' % (top_level, filename)],
> > > > > +                              stdout=subprocess.PIPE).stdout
> > > > >      diff = diff.decode('utf-8').splitlines(True)
> > > > >      commit_diff = parse_diff(diff)
> > > > >  
> > > > > @@ -476,8 +481,12 @@ def check_file(top_level, commit, filename):
> > > > >  
> > > > >      # Format the file after the commit with all formatters and compute the diff
> > > > >      # between the unformatted and formatted contents.
> > > > > -    after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> > > > > -                           stdout=subprocess.PIPE).stdout
> > > > > +    if staged:
> > > > > +        after = subprocess.run(['git', 'show', ':%s' % (filename)],
> > > > > +                               stdout=subprocess.PIPE).stdout
> > > > > +    else:
> > > > > +        after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> > > > > +                               stdout=subprocess.PIPE).stdout
> > > > >      after = after.decode('utf-8')
> > > > >  
> > > > >      formatted = after
> > > > > @@ -521,13 +530,20 @@ def check_file(top_level, commit, filename):
> > > > >      return len(formatted_diff) + len(issues)
> > > > >  
> > > > >  
> > > > > -def check_style(top_level, commit):
> > > > > -    # Get the commit title and list of files.
> > > > > -    ret = subprocess.run(['git', 'show', '--pretty=oneline','--name-only', commit],
> > > > > -                         stdout=subprocess.PIPE)
> > > > > -    files = ret.stdout.decode('utf-8').splitlines()
> > > > > -    title = files[0]
> > > > > -    files = files[1:]
> > > > > +def check_style(top_level, commit, staged):
> > > > > +    if staged:
> > > > > +        # Get list of staged changed files
> > > > > +        ret = subprocess.run(['git', 'diff', '--staged', '--name-only', commit],
> > > > > +                             stdout=subprocess.PIPE)
> > > > > +        files = ret.stdout.decode('utf-8').splitlines()
> > > > > +        title = "Pre-commit"
> > > > 
> > > > This is not necessarily a "Pre-commit" (hook). It's just staged.
> > > > A user could run the tool directly to validate some staged changes
> > > > before committing...
> > > > 
> > > > So I'd perhaps make this
> > > > 	title = "Staged"
> > > 
> > > Or maybe "Staged changes" to be slightly more explicit ?
> > 
> > It started "Staged", then I splitted it between amend and staged, and
> > for some reason ended up like this. I think I'll split it again, and
> > use "Staged changes" or "Amended changes".
> 
> Maybe "Work tree changes" instead of "Amended changes" ?

See my comment above on why Worktree isn't a great choice. What I maybe
suggest, is to title this one after the previous commit. That would
look like "Amend: <subject line>". The truth though is that nobody
using pre-commit will ever even look at the subject, so we are putting
a little too much effort in naming here.

> 
> > > > > +    else:
> > > > > +        # Get the commit title and list of files.
> > > > > +        ret = subprocess.run(['git', 'show', '--pretty=oneline', '--name-only', commit],
> > > > > +                             stdout=subprocess.PIPE)
> > > > > +        files = ret.stdout.decode('utf-8').splitlines()
> > > > > +        title = files[0]
> > > > > +        files = files[1:]
> > > > >  
> > > > >      separator = '-' * len(title)
> > > > >      print(separator)
> > > > > @@ -541,11 +557,11 @@ def check_style(top_level, commit):
> > > > >      files = [f for f in files if len([p for p in patterns if fnmatch.fnmatch(os.path.basename(f), p)])]
> > > > >      if len(files) == 0:
> > > > >          print("Commit doesn't touch source files, skipping")
> > > > > -        return
> > > > > +        return 0
> > > > >  
> > > > >      issues = 0
> > > > >      for f in files:
> > > > > -        issues += check_file(top_level, commit, f)
> > > > > +        issues += check_file(top_level, commit, f, staged)
> > > > >  
> > > > >      if issues == 0:
> > > > >          print("No style issue detected")
> > > > > @@ -554,6 +570,8 @@ def check_style(top_level, commit):
> > > > >          print("%u potential style %s detected, please review" % \
> > > > >                  (issues, 'issue' if issues == 1 else 'issues'))
> > > > >  
> > > > > +    return issues
> > > > > +
> > > > >  
> > > > >  def extract_revlist(revs):
> > > > >      """Extract a list of commits on which to operate from a revision or revision
> > > > > @@ -595,6 +613,8 @@ def main(argv):
> > > > >      parser = argparse.ArgumentParser()
> > > > >      parser.add_argument('--formatter', '-f', type=str, choices=['astyle', 'clang-format'],
> > > > >                          help='Code formatter. Default to clang-format if not specified.')
> > > > > +    parser.add_argument('--staged', '-s', action='store_true',
> > > > > +                        help='Looks at the staged changes, used for pre-commit, defaults to False')
> > > > >      parser.add_argument('revision_range', type=str, default='HEAD', nargs='?',
> > > > >                          help='Revision range (as defined by git rev-parse). Defaults to HEAD if not specified.')
> > > > >      args = parser.parse_args(argv[1:])
> > > > > @@ -632,11 +652,12 @@ def main(argv):
> > > > >  
> > > > >      revlist = extract_revlist(args.revision_range)
> > > > >  
> > > > > +    issues = 0
> > > > >      for commit in revlist:
> > > > > -        check_style(top_level, commit)
> > > > > +        issues += check_style(top_level, commit, args.staged)
> > > > >          print('')
> > > > >  
> > > > > -    return 0
> > > > > +    return issues
> > > > 
> > > > I'd be tempted to split the
> > > >   "checkstyle: Report issue count as a failure"
> > > > and
> > > >   "checkstyle: Support validating staged git state"
> > > > 
> > > > to two patches, but perhaps that's not necessary unless you see benefit
> > > > in splitting.
> > > 
> > > I don't think there's a need to resubmit just for this, but if a v2 is
> > > needed, it may be nice to split the changes indeed, to explain why
> > > changing the return value is needed.
> > 
> > I think a v2 is needed, so I'll do.
> > 
> > > According to the exit man page,
> > > 
> > > RATIONALE
> > >        As explained in other sections, certain exit status values have been reserved for special uses and should be used by applications only for those purposes:
> > > 
> > >         126    A file to be executed was found, but it was not an executable utility.
> > > 
> > >         127    A utility to be executed was not found.
> > > 
> > >        >128    A command was interrupted by a signal.
> > > 
> > > And according to exit(),
> > > 
> > > The value status & 0377 is returned to the parent process as the process's exit status, and can be collected using one of the wait(2) family of calls.
> > > 
> > > In the unfortunate event that a multiple of 256 issues would be found,
> > > the caller would think everything went fine. Should we thus return 0 on
> > > success and 1 if any issue was found ?
> > 
> > Agreed, will turn the number of issues into 0 and 1.
> > 
> > > > >  if __name__ == '__main__':
> > > > > diff --git a/utils/hooks/pre-commit b/utils/hooks/pre-commit
> > > > > new file mode 100755
> > > > > index 0000000..57d23ef
> > > > > --- /dev/null
> > > > > +++ b/utils/hooks/pre-commit
> > > > 
> > > > Though I really think this deserves it's own commit.
> > > > 
> > > >  "utils/hooks: Provide a pre-commit checkstyle hook"
> > > > 
> > > > > @@ -0,0 +1,16 @@
> > > > > +#!/bin/sh
> > > > > +
> > > > > +# Execute the checkstyle script before committing any code, failing the commit
> > > > > +# if needed. With this workflow, false positive can be ignored using:
> > > > > +#   git commit -n
> > > > > +#
> > > > > +# To utilise this hook, install this file with:
> > > > > +#   cp utils/hooks/pre-commit .git/hooks/pre-commit
> > > > 
> > > > One of the things I had toyed with is installing the hook by setting the
> > > > git hook directory to utils/hooks/ :
> > > > 
> > > >  git config core.hooksPath utils/hooks
> > > > 
> > > > But doing so now would invoke both the pre-commit and the post-commit hook.
> > > > 
> > > > I think it's good to provide the option of how someone might install
> > > > their hook
> > > > 
> > > > > +
> > > > > +commit=
> > > > > +if ps -ocommand= -p $PPID | grep -- "--amend"
> > > > > +then
> > > > > +   commit="HEAD~"
> > > > 
> > > > Is this really needed?
> > > > 
> > > > Edit: Yes, I've just played around with it - and I see what's happening.
> > > > Good catch to find that something extra was required here.
> > > > 
> > > > > +fi
> > > > > +
> > > > > +./utils/checkstyle.py --staged $commit
> > > > 
> > > > I wonder if the checkstyle.py could detect if we had staged changes if
> > > > we could fall back to needing only a single variant of the hook which
> > > > the user could choose to install as either a pre-commit or a post-commit
> > > > hook.
> > > > 
> > > > But perhaps it's more straight-forward to have two hooks to keep it
> > > > easier to support any future differences too.
> > > 
> > > I'm fine either way.
> > 
> > I think having two files in hook/ directory will be less confusing 
> > even if they are identical in the end. But I don't think the auto
> > detection is a good idea, it's makes the CLI of the tool ambiguous at
> > least.
Laurent Pinchart Jan. 17, 2020, 2:55 p.m. UTC | #6
Hi Nicolas,

On Fri, Jan 17, 2020 at 08:50:56AM -0500, Nicolas Dufresne wrote:
> Le vendredi 17 janvier 2020 à 09:46 +0200, Laurent Pinchart a écrit :
> > On Thu, Jan 16, 2020 at 08:23:36PM -0500, Nicolas Dufresne wrote:
> >> Le vendredi 17 janvier 2020 à 00:31 +0200, Laurent Pinchart a écrit :
> >>> On Thu, Jan 16, 2020 at 10:25:31PM +0000, Kieran Bingham wrote:
> >>>> On 16/01/2020 18:26, nicolas@ndufresne.ca wrote:
> >>>>> From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> >>>>> 
> >>>>> This adds support for pre-commit hook workflow. In pre-commit hook we
> >>>>> check the style on the changes currently staged. Note that this patch
> >>>>> also includes a bit of sugar to support --amend.
> >>>>> 
> >>>>> Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> >>>>> ---
> >>>>>  utils/checkstyle.py    | 55 +++++++++++++++++++++++++++++-------------
> >>>>>  utils/hooks/pre-commit | 16 ++++++++++++
> >>>>>  2 files changed, 54 insertions(+), 17 deletions(-)
> >>>>>  mode change 100644 => 100755 utils/checkstyle.py
> >>>>>  create mode 100755 utils/hooks/pre-commit
> >>>>> 
> >>>>> diff --git a/utils/checkstyle.py b/utils/checkstyle.py
> >>>>> old mode 100644
> >>>>> new mode 100755
> >>>>> index 7edea25..23eb3f6
> >>>>> --- a/utils/checkstyle.py
> >>>>> +++ b/utils/checkstyle.py
> >>>>> @@ -458,11 +458,16 @@ class StripTrailingSpaceFormatter(Formatter):
> >>>>>  # Style checking
> >>>>>  #
> >>>>>  
> >>>>> -def check_file(top_level, commit, filename):
> >>>>> +def check_file(top_level, commit, filename, staged):
> >>>>>      # Extract the line numbers touched by the commit.
> >>>>> -    diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> >>>>> -                           '%s/%s' % (top_level, filename)],
> >>>>> -                          stdout=subprocess.PIPE).stdout
> >>>> 
> >>>> I wonder if we could/should automate this by detecting staged changes,
> >>>> and then setting staged based on that?
> >>>> 
> >>>> 	"git status --porcelain | grep ^M"
> >>>> 
> >>>> Could detect if there are staged changes.
> >>>> 
> >>>> But equally it might be better to allow the caller to be explicit.
> >>> 
> >>> I've been wondering about this too, I have a slight preference for
> >>> auto-detection, but that's not a hard requirement if too difficult to
> >>> implement or impractical to use.
> >> 
> >> I don't think it hard, but I wasn't sure it was a good idea. One thing
> >> that I've been a little slowpy is with the meaning of commit here (of
> >> the rev list in fact). Before I added amend support, I was simply
> >> ignoring that (no meaning). But then I recycled it to support the
> >> commit --amend case, where you want to included the staged changes and
> >> the previous commit.
> >> 
> >> What I found worked to get that information was to only pass one hash,
> >> which is the commit before the last commit (HEAD~). Anyway, I think I
> >> should fix this, and  so '%s~' % commit instead if I want to maintained
> >> the behaviour. But by doing that, there is no longer anything to
> >> describe that when I commit --amend, I want the combined report, not
> >> just what was staged. And that I also don't want the previous commit to
> >> be checked independently from the staged, because that would produce
> >> that warning I'm trying to fix.
> >> 
> >> So my conclusion, is that to disambiguate all this, I should in fact be
> >> even more explicit, and also introduce --amend. A better implementation
> >> would be to find a way to integrated these special revisions into the
> >> revlist, this way (not sure if there is a use case) but you could
> >> request a report for specific commits, the staging changes and the
> >> amended changes in one call.
> > 
> > That would be neat indeed. From a quick look at git-rev-parse there's no
> > revision syntax to express this that I could find. We could extend the
> > revision syntax, but that may be dangerous.
> 
> I was thinking to keep the --staged/amend options, but to use mixed
> type in the python list, and do type checks when iterating. That would
> avoid the binary parameter, which is just bad API really.
> 
> > If we instead go for explicit arguments, --staged or --cached would be
> > fine with me, but I'd rather use --worktree (or something similar) than
> > --amend to reflect that the revision corresponds to the working tree.
> 
> I believe worktree is a much worst choice as it matches a completely
> unrelated concept in git (see git worktree --help). The concept of an
> amendment isn't defined, but can logically match the combination of the
> previous commit and the staged changes, since this is what git commit 
> --amend will combine to replace the last commit.

"Working tree" had an established meaning before worktree was introduced
:-) That's how git-diff is documented:

"Show changes between the working tree and the index or a tree, changes
between the index and a tree, changes between two trees, changes between
two blob objects, or changes between two files on disk."

--working-tree could work too but is a bit long, although if it's only
used by the pre-commit hook (and/or if we add a short -w option) it
should work.

It would be really nice if git had a reference specifier for the index
in the working tree, we could then do

checkstyle.py HEAD..INDEX
checkstyle.py HEAD..WORK

(or similar). Unless I'm mistaken, the pre-commit hook would then use

checkstyle.py HEAD..INDEX

for the normal git commit case, and

checkstyle.py HEAD~..INDEX

for the --amend case. This would have my preference. And now that I've
written this, --worktree seems a bad name indeed if it's an alias for
HEAD~ on the left-hand side. I proposed it thinking it would be an alias
for <commit>..WORK, but that doesn't seem needed for the pre-commit
hook.

> >>>>> +    if staged:
> >>>>> +        diff = subprocess.run(['git', 'diff', '--staged', commit, '--',
> >>>>> +                               '%s/%s' % (top_level, filename)],
> >>>>> +                              stdout=subprocess.PIPE).stdout
> >>>>> +    else:
> >>>>> +        diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> >>>>> +                               '%s/%s' % (top_level, filename)],
> >>>>> +                              stdout=subprocess.PIPE).stdout
> >>>>>      diff = diff.decode('utf-8').splitlines(True)
> >>>>>      commit_diff = parse_diff(diff)
> >>>>>  
> >>>>> @@ -476,8 +481,12 @@ def check_file(top_level, commit, filename):
> >>>>>  
> >>>>>      # Format the file after the commit with all formatters and compute the diff
> >>>>>      # between the unformatted and formatted contents.
> >>>>> -    after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> >>>>> -                           stdout=subprocess.PIPE).stdout
> >>>>> +    if staged:
> >>>>> +        after = subprocess.run(['git', 'show', ':%s' % (filename)],
> >>>>> +                               stdout=subprocess.PIPE).stdout
> >>>>> +    else:
> >>>>> +        after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> >>>>> +                               stdout=subprocess.PIPE).stdout
> >>>>>      after = after.decode('utf-8')
> >>>>>  
> >>>>>      formatted = after
> >>>>> @@ -521,13 +530,20 @@ def check_file(top_level, commit, filename):
> >>>>>      return len(formatted_diff) + len(issues)
> >>>>>  
> >>>>>  
> >>>>> -def check_style(top_level, commit):
> >>>>> -    # Get the commit title and list of files.
> >>>>> -    ret = subprocess.run(['git', 'show', '--pretty=oneline','--name-only', commit],
> >>>>> -                         stdout=subprocess.PIPE)
> >>>>> -    files = ret.stdout.decode('utf-8').splitlines()
> >>>>> -    title = files[0]
> >>>>> -    files = files[1:]
> >>>>> +def check_style(top_level, commit, staged):
> >>>>> +    if staged:
> >>>>> +        # Get list of staged changed files
> >>>>> +        ret = subprocess.run(['git', 'diff', '--staged', '--name-only', commit],
> >>>>> +                             stdout=subprocess.PIPE)
> >>>>> +        files = ret.stdout.decode('utf-8').splitlines()
> >>>>> +        title = "Pre-commit"
> >>>> 
> >>>> This is not necessarily a "Pre-commit" (hook). It's just staged.
> >>>> A user could run the tool directly to validate some staged changes
> >>>> before committing...
> >>>> 
> >>>> So I'd perhaps make this
> >>>> 	title = "Staged"
> >>> 
> >>> Or maybe "Staged changes" to be slightly more explicit ?
> >> 
> >> It started "Staged", then I splitted it between amend and staged, and
> >> for some reason ended up like this. I think I'll split it again, and
> >> use "Staged changes" or "Amended changes".
> > 
> > Maybe "Work tree changes" instead of "Amended changes" ?
> 
> See my comment above on why Worktree isn't a great choice. What I maybe
> suggest, is to title this one after the previous commit. That would
> look like "Amend: <subject line>". The truth though is that nobody
> using pre-commit will ever even look at the subject, so we are putting
> a little too much effort in naming here.
> 
> >>>>> +    else:
> >>>>> +        # Get the commit title and list of files.
> >>>>> +        ret = subprocess.run(['git', 'show', '--pretty=oneline', '--name-only', commit],
> >>>>> +                             stdout=subprocess.PIPE)
> >>>>> +        files = ret.stdout.decode('utf-8').splitlines()
> >>>>> +        title = files[0]
> >>>>> +        files = files[1:]
> >>>>>  
> >>>>>      separator = '-' * len(title)
> >>>>>      print(separator)
> >>>>> @@ -541,11 +557,11 @@ def check_style(top_level, commit):
> >>>>>      files = [f for f in files if len([p for p in patterns if fnmatch.fnmatch(os.path.basename(f), p)])]
> >>>>>      if len(files) == 0:
> >>>>>          print("Commit doesn't touch source files, skipping")
> >>>>> -        return
> >>>>> +        return 0
> >>>>>  
> >>>>>      issues = 0
> >>>>>      for f in files:
> >>>>> -        issues += check_file(top_level, commit, f)
> >>>>> +        issues += check_file(top_level, commit, f, staged)
> >>>>>  
> >>>>>      if issues == 0:
> >>>>>          print("No style issue detected")
> >>>>> @@ -554,6 +570,8 @@ def check_style(top_level, commit):
> >>>>>          print("%u potential style %s detected, please review" % \
> >>>>>                  (issues, 'issue' if issues == 1 else 'issues'))
> >>>>>  
> >>>>> +    return issues
> >>>>> +
> >>>>>  
> >>>>>  def extract_revlist(revs):
> >>>>>      """Extract a list of commits on which to operate from a revision or revision
> >>>>> @@ -595,6 +613,8 @@ def main(argv):
> >>>>>      parser = argparse.ArgumentParser()
> >>>>>      parser.add_argument('--formatter', '-f', type=str, choices=['astyle', 'clang-format'],
> >>>>>                          help='Code formatter. Default to clang-format if not specified.')
> >>>>> +    parser.add_argument('--staged', '-s', action='store_true',
> >>>>> +                        help='Looks at the staged changes, used for pre-commit, defaults to False')
> >>>>>      parser.add_argument('revision_range', type=str, default='HEAD', nargs='?',
> >>>>>                          help='Revision range (as defined by git rev-parse). Defaults to HEAD if not specified.')
> >>>>>      args = parser.parse_args(argv[1:])
> >>>>> @@ -632,11 +652,12 @@ def main(argv):
> >>>>>  
> >>>>>      revlist = extract_revlist(args.revision_range)
> >>>>>  
> >>>>> +    issues = 0
> >>>>>      for commit in revlist:
> >>>>> -        check_style(top_level, commit)
> >>>>> +        issues += check_style(top_level, commit, args.staged)
> >>>>>          print('')
> >>>>>  
> >>>>> -    return 0
> >>>>> +    return issues
> >>>> 
> >>>> I'd be tempted to split the
> >>>>   "checkstyle: Report issue count as a failure"
> >>>> and
> >>>>   "checkstyle: Support validating staged git state"
> >>>> 
> >>>> to two patches, but perhaps that's not necessary unless you see benefit
> >>>> in splitting.
> >>> 
> >>> I don't think there's a need to resubmit just for this, but if a v2 is
> >>> needed, it may be nice to split the changes indeed, to explain why
> >>> changing the return value is needed.
> >> 
> >> I think a v2 is needed, so I'll do.
> >> 
> >>> According to the exit man page,
> >>> 
> >>> RATIONALE
> >>>        As explained in other sections, certain exit status values have been reserved for special uses and should be used by applications only for those purposes:
> >>> 
> >>>         126    A file to be executed was found, but it was not an executable utility.
> >>> 
> >>>         127    A utility to be executed was not found.
> >>> 
> >>>        >128    A command was interrupted by a signal.
> >>> 
> >>> And according to exit(),
> >>> 
> >>> The value status & 0377 is returned to the parent process as the process's exit status, and can be collected using one of the wait(2) family of calls.
> >>> 
> >>> In the unfortunate event that a multiple of 256 issues would be found,
> >>> the caller would think everything went fine. Should we thus return 0 on
> >>> success and 1 if any issue was found ?
> >> 
> >> Agreed, will turn the number of issues into 0 and 1.
> >> 
> >>>>>  if __name__ == '__main__':
> >>>>> diff --git a/utils/hooks/pre-commit b/utils/hooks/pre-commit
> >>>>> new file mode 100755
> >>>>> index 0000000..57d23ef
> >>>>> --- /dev/null
> >>>>> +++ b/utils/hooks/pre-commit
> >>>> 
> >>>> Though I really think this deserves it's own commit.
> >>>> 
> >>>>  "utils/hooks: Provide a pre-commit checkstyle hook"
> >>>> 
> >>>>> @@ -0,0 +1,16 @@
> >>>>> +#!/bin/sh
> >>>>> +
> >>>>> +# Execute the checkstyle script before committing any code, failing the commit
> >>>>> +# if needed. With this workflow, false positive can be ignored using:
> >>>>> +#   git commit -n
> >>>>> +#
> >>>>> +# To utilise this hook, install this file with:
> >>>>> +#   cp utils/hooks/pre-commit .git/hooks/pre-commit
> >>>> 
> >>>> One of the things I had toyed with is installing the hook by setting the
> >>>> git hook directory to utils/hooks/ :
> >>>> 
> >>>>  git config core.hooksPath utils/hooks
> >>>> 
> >>>> But doing so now would invoke both the pre-commit and the post-commit hook.
> >>>> 
> >>>> I think it's good to provide the option of how someone might install
> >>>> their hook
> >>>> 
> >>>>> +
> >>>>> +commit=
> >>>>> +if ps -ocommand= -p $PPID | grep -- "--amend"
> >>>>> +then
> >>>>> +   commit="HEAD~"
> >>>> 
> >>>> Is this really needed?
> >>>> 
> >>>> Edit: Yes, I've just played around with it - and I see what's happening.
> >>>> Good catch to find that something extra was required here.
> >>>> 
> >>>>> +fi
> >>>>> +
> >>>>> +./utils/checkstyle.py --staged $commit
> >>>> 
> >>>> I wonder if the checkstyle.py could detect if we had staged changes if
> >>>> we could fall back to needing only a single variant of the hook which
> >>>> the user could choose to install as either a pre-commit or a post-commit
> >>>> hook.
> >>>> 
> >>>> But perhaps it's more straight-forward to have two hooks to keep it
> >>>> easier to support any future differences too.
> >>> 
> >>> I'm fine either way.
> >> 
> >> I think having two files in hook/ directory will be less confusing 
> >> even if they are identical in the end. But I don't think the auto
> >> detection is a good idea, it's makes the CLI of the tool ambiguous at
> >> least.
Nicolas Dufresne Jan. 17, 2020, 3:55 p.m. UTC | #7
Le vendredi 17 janvier 2020 à 16:55 +0200, Laurent Pinchart a écrit :
> Hi Nicolas,
> 
> On Fri, Jan 17, 2020 at 08:50:56AM -0500, Nicolas Dufresne wrote:
> > Le vendredi 17 janvier 2020 à 09:46 +0200, Laurent Pinchart a écrit :
> > > On Thu, Jan 16, 2020 at 08:23:36PM -0500, Nicolas Dufresne wrote:
> > > > Le vendredi 17 janvier 2020 à 00:31 +0200, Laurent Pinchart a écrit :
> > > > > On Thu, Jan 16, 2020 at 10:25:31PM +0000, Kieran Bingham wrote:
> > > > > > On 16/01/2020 18:26, nicolas@ndufresne.ca wrote:
> > > > > > > From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > > > > > > 
> > > > > > > This adds support for pre-commit hook workflow. In pre-commit hook we
> > > > > > > check the style on the changes currently staged. Note that this patch
> > > > > > > also includes a bit of sugar to support --amend.
> > > > > > > 
> > > > > > > Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> > > > > > > ---
> > > > > > >  utils/checkstyle.py    | 55 +++++++++++++++++++++++++++++-------------
> > > > > > >  utils/hooks/pre-commit | 16 ++++++++++++
> > > > > > >  2 files changed, 54 insertions(+), 17 deletions(-)
> > > > > > >  mode change 100644 => 100755 utils/checkstyle.py
> > > > > > >  create mode 100755 utils/hooks/pre-commit
> > > > > > > 
> > > > > > > diff --git a/utils/checkstyle.py b/utils/checkstyle.py
> > > > > > > old mode 100644
> > > > > > > new mode 100755
> > > > > > > index 7edea25..23eb3f6
> > > > > > > --- a/utils/checkstyle.py
> > > > > > > +++ b/utils/checkstyle.py
> > > > > > > @@ -458,11 +458,16 @@ class StripTrailingSpaceFormatter(Formatter):
> > > > > > >  # Style checking
> > > > > > >  #
> > > > > > >  
> > > > > > > -def check_file(top_level, commit, filename):
> > > > > > > +def check_file(top_level, commit, filename, staged):
> > > > > > >      # Extract the line numbers touched by the commit.
> > > > > > > -    diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> > > > > > > -                           '%s/%s' % (top_level, filename)],
> > > > > > > -                          stdout=subprocess.PIPE).stdout
> > > > > > 
> > > > > > I wonder if we could/should automate this by detecting staged changes,
> > > > > > and then setting staged based on that?
> > > > > > 
> > > > > > 	"git status --porcelain | grep ^M"
> > > > > > 
> > > > > > Could detect if there are staged changes.
> > > > > > 
> > > > > > But equally it might be better to allow the caller to be explicit.
> > > > > 
> > > > > I've been wondering about this too, I have a slight preference for
> > > > > auto-detection, but that's not a hard requirement if too difficult to
> > > > > implement or impractical to use.
> > > > 
> > > > I don't think it hard, but I wasn't sure it was a good idea. One thing
> > > > that I've been a little slowpy is with the meaning of commit here (of
> > > > the rev list in fact). Before I added amend support, I was simply
> > > > ignoring that (no meaning). But then I recycled it to support the
> > > > commit --amend case, where you want to included the staged changes and
> > > > the previous commit.
> > > > 
> > > > What I found worked to get that information was to only pass one hash,
> > > > which is the commit before the last commit (HEAD~). Anyway, I think I
> > > > should fix this, and  so '%s~' % commit instead if I want to maintained
> > > > the behaviour. But by doing that, there is no longer anything to
> > > > describe that when I commit --amend, I want the combined report, not
> > > > just what was staged. And that I also don't want the previous commit to
> > > > be checked independently from the staged, because that would produce
> > > > that warning I'm trying to fix.
> > > > 
> > > > So my conclusion, is that to disambiguate all this, I should in fact be
> > > > even more explicit, and also introduce --amend. A better implementation
> > > > would be to find a way to integrated these special revisions into the
> > > > revlist, this way (not sure if there is a use case) but you could
> > > > request a report for specific commits, the staging changes and the
> > > > amended changes in one call.
> > > 
> > > That would be neat indeed. From a quick look at git-rev-parse there's no
> > > revision syntax to express this that I could find. We could extend the
> > > revision syntax, but that may be dangerous.
> > 
> > I was thinking to keep the --staged/amend options, but to use mixed
> > type in the python list, and do type checks when iterating. That would
> > avoid the binary parameter, which is just bad API really.
> > 
> > > If we instead go for explicit arguments, --staged or --cached would be
> > > fine with me, but I'd rather use --worktree (or something similar) than
> > > --amend to reflect that the revision corresponds to the working tree.
> > 
> > I believe worktree is a much worst choice as it matches a completely
> > unrelated concept in git (see git worktree --help). The concept of an
> > amendment isn't defined, but can logically match the combination of the
> > previous commit and the staged changes, since this is what git commit 
> > --amend will combine to replace the last commit.
> 
> "Working tree" had an established meaning before worktree was introduced
> :-) That's how git-diff is documented:
> 
> "Show changes between the working tree and the index or a tree, changes
> between the index and a tree, changes between two trees, changes between
> two blob objects, or changes between two files on disk."
> 
> --working-tree could work too but is a bit long, although if it's only
> used by the pre-commit hook (and/or if we add a short -w option) it
> should work.

I believe you didn't interpret this correctly. The working tree is the
code as seen on your file system, the index is the changes that are to
be commited. What I check is the index (pre-commit changes), not he
working tree.

Right now, I'm just paying for git's mistake. What I want to deal with
is the index, which in git-diff is shown through --cached or --staged
command line option for unknown reasons. Looking at the changes in the
working tree to totally irrelevant to a pre-commit hook, but could be
an interesting feature to add if you feel it's useful to you.

But here, my goal is to add support for pre-commit hook, which is a
check run before you fill in the message. So I need to pick what git is
about to commit. In the case of commit, that's just the index, and for
commit --amend, it's the combination of the last commit and the index.

> 
> It would be really nice if git had a reference specifier for the index
> in the working tree, we could then do
> 
> checkstyle.py HEAD..INDEX
> checkstyle.py HEAD..WORK
> 
> (or similar). Unless I'm mistaken, the pre-commit hook would then use
> 
> checkstyle.py HEAD..INDEX
> 
> for the normal git commit case, and
> 
> checkstyle.py HEAD~..INDEX
> 
> for the --amend case. This would have my preference. And now that I've
> written this, --worktree seems a bad name indeed if it's an alias for
> HEAD~ on the left-hand side. I proposed it thinking it would be an alias
> for <commit>..WORK, but that doesn't seem needed for the pre-commit
> hook.
> 
> > > > > > > +    if staged:
> > > > > > > +        diff = subprocess.run(['git', 'diff', '--staged', commit, '--',
> > > > > > > +                               '%s/%s' % (top_level, filename)],
> > > > > > > +                              stdout=subprocess.PIPE).stdout
> > > > > > > +    else:
> > > > > > > +        diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
> > > > > > > +                               '%s/%s' % (top_level, filename)],
> > > > > > > +                              stdout=subprocess.PIPE).stdout
> > > > > > >      diff = diff.decode('utf-8').splitlines(True)
> > > > > > >      commit_diff = parse_diff(diff)
> > > > > > >  
> > > > > > > @@ -476,8 +481,12 @@ def check_file(top_level, commit, filename):
> > > > > > >  
> > > > > > >      # Format the file after the commit with all formatters and compute the diff
> > > > > > >      # between the unformatted and formatted contents.
> > > > > > > -    after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> > > > > > > -                           stdout=subprocess.PIPE).stdout
> > > > > > > +    if staged:
> > > > > > > +        after = subprocess.run(['git', 'show', ':%s' % (filename)],
> > > > > > > +                               stdout=subprocess.PIPE).stdout
> > > > > > > +    else:
> > > > > > > +        after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
> > > > > > > +                               stdout=subprocess.PIPE).stdout
> > > > > > >      after = after.decode('utf-8')
> > > > > > >  
> > > > > > >      formatted = after
> > > > > > > @@ -521,13 +530,20 @@ def check_file(top_level, commit, filename):
> > > > > > >      return len(formatted_diff) + len(issues)
> > > > > > >  
> > > > > > >  
> > > > > > > -def check_style(top_level, commit):
> > > > > > > -    # Get the commit title and list of files.
> > > > > > > -    ret = subprocess.run(['git', 'show', '--pretty=oneline','--name-only', commit],
> > > > > > > -                         stdout=subprocess.PIPE)
> > > > > > > -    files = ret.stdout.decode('utf-8').splitlines()
> > > > > > > -    title = files[0]
> > > > > > > -    files = files[1:]
> > > > > > > +def check_style(top_level, commit, staged):
> > > > > > > +    if staged:
> > > > > > > +        # Get list of staged changed files
> > > > > > > +        ret = subprocess.run(['git', 'diff', '--staged', '--name-only', commit],
> > > > > > > +                             stdout=subprocess.PIPE)
> > > > > > > +        files = ret.stdout.decode('utf-8').splitlines()
> > > > > > > +        title = "Pre-commit"
> > > > > > 
> > > > > > This is not necessarily a "Pre-commit" (hook). It's just staged.
> > > > > > A user could run the tool directly to validate some staged changes
> > > > > > before committing...
> > > > > > 
> > > > > > So I'd perhaps make this
> > > > > > 	title = "Staged"
> > > > > 
> > > > > Or maybe "Staged changes" to be slightly more explicit ?
> > > > 
> > > > It started "Staged", then I splitted it between amend and staged, and
> > > > for some reason ended up like this. I think I'll split it again, and
> > > > use "Staged changes" or "Amended changes".
> > > 
> > > Maybe "Work tree changes" instead of "Amended changes" ?
> > 
> > See my comment above on why Worktree isn't a great choice. What I maybe
> > suggest, is to title this one after the previous commit. That would
> > look like "Amend: <subject line>". The truth though is that nobody
> > using pre-commit will ever even look at the subject, so we are putting
> > a little too much effort in naming here.
> > 
> > > > > > > +    else:
> > > > > > > +        # Get the commit title and list of files.
> > > > > > > +        ret = subprocess.run(['git', 'show', '--pretty=oneline', '--name-only', commit],
> > > > > > > +                             stdout=subprocess.PIPE)
> > > > > > > +        files = ret.stdout.decode('utf-8').splitlines()
> > > > > > > +        title = files[0]
> > > > > > > +        files = files[1:]
> > > > > > >  
> > > > > > >      separator = '-' * len(title)
> > > > > > >      print(separator)
> > > > > > > @@ -541,11 +557,11 @@ def check_style(top_level, commit):
> > > > > > >      files = [f for f in files if len([p for p in patterns if fnmatch.fnmatch(os.path.basename(f), p)])]
> > > > > > >      if len(files) == 0:
> > > > > > >          print("Commit doesn't touch source files, skipping")
> > > > > > > -        return
> > > > > > > +        return 0
> > > > > > >  
> > > > > > >      issues = 0
> > > > > > >      for f in files:
> > > > > > > -        issues += check_file(top_level, commit, f)
> > > > > > > +        issues += check_file(top_level, commit, f, staged)
> > > > > > >  
> > > > > > >      if issues == 0:
> > > > > > >          print("No style issue detected")
> > > > > > > @@ -554,6 +570,8 @@ def check_style(top_level, commit):
> > > > > > >          print("%u potential style %s detected, please review" % \
> > > > > > >                  (issues, 'issue' if issues == 1 else 'issues'))
> > > > > > >  
> > > > > > > +    return issues
> > > > > > > +
> > > > > > >  
> > > > > > >  def extract_revlist(revs):
> > > > > > >      """Extract a list of commits on which to operate from a revision or revision
> > > > > > > @@ -595,6 +613,8 @@ def main(argv):
> > > > > > >      parser = argparse.ArgumentParser()
> > > > > > >      parser.add_argument('--formatter', '-f', type=str, choices=['astyle', 'clang-format'],
> > > > > > >                          help='Code formatter. Default to clang-format if not specified.')
> > > > > > > +    parser.add_argument('--staged', '-s', action='store_true',
> > > > > > > +                        help='Looks at the staged changes, used for pre-commit, defaults to False')
> > > > > > >      parser.add_argument('revision_range', type=str, default='HEAD', nargs='?',
> > > > > > >                          help='Revision range (as defined by git rev-parse). Defaults to HEAD if not specified.')
> > > > > > >      args = parser.parse_args(argv[1:])
> > > > > > > @@ -632,11 +652,12 @@ def main(argv):
> > > > > > >  
> > > > > > >      revlist = extract_revlist(args.revision_range)
> > > > > > >  
> > > > > > > +    issues = 0
> > > > > > >      for commit in revlist:
> > > > > > > -        check_style(top_level, commit)
> > > > > > > +        issues += check_style(top_level, commit, args.staged)
> > > > > > >          print('')
> > > > > > >  
> > > > > > > -    return 0
> > > > > > > +    return issues
> > > > > > 
> > > > > > I'd be tempted to split the
> > > > > >   "checkstyle: Report issue count as a failure"
> > > > > > and
> > > > > >   "checkstyle: Support validating staged git state"
> > > > > > 
> > > > > > to two patches, but perhaps that's not necessary unless you see benefit
> > > > > > in splitting.
> > > > > 
> > > > > I don't think there's a need to resubmit just for this, but if a v2 is
> > > > > needed, it may be nice to split the changes indeed, to explain why
> > > > > changing the return value is needed.
> > > > 
> > > > I think a v2 is needed, so I'll do.
> > > > 
> > > > > According to the exit man page,
> > > > > 
> > > > > RATIONALE
> > > > >        As explained in other sections, certain exit status values have been reserved for special uses and should be used by applications only for those purposes:
> > > > > 
> > > > >         126    A file to be executed was found, but it was not an executable utility.
> > > > > 
> > > > >         127    A utility to be executed was not found.
> > > > > 
> > > > >        >128    A command was interrupted by a signal.
> > > > > 
> > > > > And according to exit(),
> > > > > 
> > > > > The value status & 0377 is returned to the parent process as the process's exit status, and can be collected using one of the wait(2) family of calls.
> > > > > 
> > > > > In the unfortunate event that a multiple of 256 issues would be found,
> > > > > the caller would think everything went fine. Should we thus return 0 on
> > > > > success and 1 if any issue was found ?
> > > > 
> > > > Agreed, will turn the number of issues into 0 and 1.
> > > > 
> > > > > > >  if __name__ == '__main__':
> > > > > > > diff --git a/utils/hooks/pre-commit b/utils/hooks/pre-commit
> > > > > > > new file mode 100755
> > > > > > > index 0000000..57d23ef
> > > > > > > --- /dev/null
> > > > > > > +++ b/utils/hooks/pre-commit
> > > > > > 
> > > > > > Though I really think this deserves it's own commit.
> > > > > > 
> > > > > >  "utils/hooks: Provide a pre-commit checkstyle hook"
> > > > > > 
> > > > > > > @@ -0,0 +1,16 @@
> > > > > > > +#!/bin/sh
> > > > > > > +
> > > > > > > +# Execute the checkstyle script before committing any code, failing the commit
> > > > > > > +# if needed. With this workflow, false positive can be ignored using:
> > > > > > > +#   git commit -n
> > > > > > > +#
> > > > > > > +# To utilise this hook, install this file with:
> > > > > > > +#   cp utils/hooks/pre-commit .git/hooks/pre-commit
> > > > > > 
> > > > > > One of the things I had toyed with is installing the hook by setting the
> > > > > > git hook directory to utils/hooks/ :
> > > > > > 
> > > > > >  git config core.hooksPath utils/hooks
> > > > > > 
> > > > > > But doing so now would invoke both the pre-commit and the post-commit hook.
> > > > > > 
> > > > > > I think it's good to provide the option of how someone might install
> > > > > > their hook
> > > > > > 
> > > > > > > +
> > > > > > > +commit=
> > > > > > > +if ps -ocommand= -p $PPID | grep -- "--amend"
> > > > > > > +then
> > > > > > > +   commit="HEAD~"
> > > > > > 
> > > > > > Is this really needed?
> > > > > > 
> > > > > > Edit: Yes, I've just played around with it - and I see what's happening.
> > > > > > Good catch to find that something extra was required here.
> > > > > > 
> > > > > > > +fi
> > > > > > > +
> > > > > > > +./utils/checkstyle.py --staged $commit
> > > > > > 
> > > > > > I wonder if the checkstyle.py could detect if we had staged changes if
> > > > > > we could fall back to needing only a single variant of the hook which
> > > > > > the user could choose to install as either a pre-commit or a post-commit
> > > > > > hook.
> > > > > > 
> > > > > > But perhaps it's more straight-forward to have two hooks to keep it
> > > > > > easier to support any future differences too.
> > > > > 
> > > > > I'm fine either way.
> > > > 
> > > > I think having two files in hook/ directory will be less confusing 
> > > > even if they are identical in the end. But I don't think the auto
> > > > detection is a good idea, it's makes the CLI of the tool ambiguous at
> > > > least.

Patch

diff --git a/utils/checkstyle.py b/utils/checkstyle.py
old mode 100644
new mode 100755
index 7edea25..23eb3f6
--- a/utils/checkstyle.py
+++ b/utils/checkstyle.py
@@ -458,11 +458,16 @@  class StripTrailingSpaceFormatter(Formatter):
 # Style checking
 #
 
-def check_file(top_level, commit, filename):
+def check_file(top_level, commit, filename, staged):
     # Extract the line numbers touched by the commit.
-    diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
-                           '%s/%s' % (top_level, filename)],
-                          stdout=subprocess.PIPE).stdout
+    if staged:
+        diff = subprocess.run(['git', 'diff', '--staged', commit, '--',
+                               '%s/%s' % (top_level, filename)],
+                              stdout=subprocess.PIPE).stdout
+    else:
+        diff = subprocess.run(['git', 'diff', '%s~..%s' % (commit, commit), '--',
+                               '%s/%s' % (top_level, filename)],
+                              stdout=subprocess.PIPE).stdout
     diff = diff.decode('utf-8').splitlines(True)
     commit_diff = parse_diff(diff)
 
@@ -476,8 +481,12 @@  def check_file(top_level, commit, filename):
 
     # Format the file after the commit with all formatters and compute the diff
     # between the unformatted and formatted contents.
-    after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
-                           stdout=subprocess.PIPE).stdout
+    if staged:
+        after = subprocess.run(['git', 'show', ':%s' % (filename)],
+                               stdout=subprocess.PIPE).stdout
+    else:
+        after = subprocess.run(['git', 'show', '%s:%s' % (commit, filename)],
+                               stdout=subprocess.PIPE).stdout
     after = after.decode('utf-8')
 
     formatted = after
@@ -521,13 +530,20 @@  def check_file(top_level, commit, filename):
     return len(formatted_diff) + len(issues)
 
 
-def check_style(top_level, commit):
-    # Get the commit title and list of files.
-    ret = subprocess.run(['git', 'show', '--pretty=oneline','--name-only', commit],
-                         stdout=subprocess.PIPE)
-    files = ret.stdout.decode('utf-8').splitlines()
-    title = files[0]
-    files = files[1:]
+def check_style(top_level, commit, staged):
+    if staged:
+        # Get list of staged changed files
+        ret = subprocess.run(['git', 'diff', '--staged', '--name-only', commit],
+                             stdout=subprocess.PIPE)
+        files = ret.stdout.decode('utf-8').splitlines()
+        title = "Pre-commit"
+    else:
+        # Get the commit title and list of files.
+        ret = subprocess.run(['git', 'show', '--pretty=oneline', '--name-only', commit],
+                             stdout=subprocess.PIPE)
+        files = ret.stdout.decode('utf-8').splitlines()
+        title = files[0]
+        files = files[1:]
 
     separator = '-' * len(title)
     print(separator)
@@ -541,11 +557,11 @@  def check_style(top_level, commit):
     files = [f for f in files if len([p for p in patterns if fnmatch.fnmatch(os.path.basename(f), p)])]
     if len(files) == 0:
         print("Commit doesn't touch source files, skipping")
-        return
+        return 0
 
     issues = 0
     for f in files:
-        issues += check_file(top_level, commit, f)
+        issues += check_file(top_level, commit, f, staged)
 
     if issues == 0:
         print("No style issue detected")
@@ -554,6 +570,8 @@  def check_style(top_level, commit):
         print("%u potential style %s detected, please review" % \
                 (issues, 'issue' if issues == 1 else 'issues'))
 
+    return issues
+
 
 def extract_revlist(revs):
     """Extract a list of commits on which to operate from a revision or revision
@@ -595,6 +613,8 @@  def main(argv):
     parser = argparse.ArgumentParser()
     parser.add_argument('--formatter', '-f', type=str, choices=['astyle', 'clang-format'],
                         help='Code formatter. Default to clang-format if not specified.')
+    parser.add_argument('--staged', '-s', action='store_true',
+                        help='Looks at the staged changes, used for pre-commit, defaults to False')
     parser.add_argument('revision_range', type=str, default='HEAD', nargs='?',
                         help='Revision range (as defined by git rev-parse). Defaults to HEAD if not specified.')
     args = parser.parse_args(argv[1:])
@@ -632,11 +652,12 @@  def main(argv):
 
     revlist = extract_revlist(args.revision_range)
 
+    issues = 0
     for commit in revlist:
-        check_style(top_level, commit)
+        issues += check_style(top_level, commit, args.staged)
         print('')
 
-    return 0
+    return issues
 
 
 if __name__ == '__main__':
diff --git a/utils/hooks/pre-commit b/utils/hooks/pre-commit
new file mode 100755
index 0000000..57d23ef
--- /dev/null
+++ b/utils/hooks/pre-commit
@@ -0,0 +1,16 @@ 
+#!/bin/sh
+
+# Execute the checkstyle script before committing any code, failing the commit
+# if needed. With this workflow, false positive can be ignored using:
+#   git commit -n
+#
+# To utilise this hook, install this file with:
+#   cp utils/hooks/pre-commit .git/hooks/pre-commit
+
+commit=
+if ps -ocommand= -p $PPID | grep -- "--amend"
+then
+   commit="HEAD~"
+fi
+
+./utils/checkstyle.py --staged $commit