From 03372dbbe618bfcd02f9c8cdbfe78e97e3aad43b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 5 Mar 2010 13:43:00 -0800 Subject: scripts/get_maintainer.pl: add --file-emails, find embedded email addresses Add an imperfect option to search a source file for email addresses. New option: --file-emails or --fe email addresses in files are freeform text and are nearly impossible to parse. Still, might as well try to do a somewhat acceptable job of finding them. This code should find all addresses that are in the form addr@domain.tld The code assumes that up to 3 alphabetic words along with dashes, commas, and periods that preceed the email address are a name. If 3 words are found for the name, and one of the first two words are a single letter and period, or just a single letter then the 3 words are use as name otherwise the last 2 words are used. Some variants that are shown correctly: John Smith Random J. Developer Random J. Developer (rjd@tld.com) J. Random Developer rjd@tld.com Variants that are shown nominally correctly: Written by First Last (funny-addr@somecompany.com) is shown as: First Last Variants that are shown incorrectly: Some Really Long Name MontaVista Software, Inc. are returned as: Long Name "Software, Inc" --roles and --rolestats show "(in file)" for matches. For instance: Without -file-emails: $ ./scripts/get_maintainer.pl -f -nogit -roles net/core/netpoll.c David S. Miller (maintainer:NETWORKING [GENERAL]) linux-kernel@vger.kernel.org (open list) With -fe: $ ./scripts/get_maintainer.pl -f -fe -nogit -roles net/core/netpoll.c David S. Miller (maintainer:NETWORKING [GENERAL]) Matt Mackall (in file) Ingo Molnar (in file) linux-kernel@vger.kernel.org (open list) netdev@vger.kernel.org (open list:NETWORKING [GENERAL]) The number of email addresses in the file in not limited. Neither is the number of returned email addresses. Signed-off-by: Joe Perches Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 83 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 7 deletions(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 2f3230db7ff..bff2390652c 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -41,6 +41,7 @@ my $web = 0; my $subsystem = 0; my $status = 0; my $keywords = 1; +my $file_emails = 0; my $from_filename = 0; my $pattern_depth = 0; my $version = 0; @@ -120,6 +121,7 @@ if (!GetOptions( 'web!' => \$web, 'pattern-depth=i' => \$pattern_depth, 'k|keywords!' => \$keywords, + 'fe|file-emails!' => \$file_emails, 'f|file' => \$from_filename, 'v|version' => \$version, 'h|help' => \$help, @@ -232,6 +234,7 @@ if ($email_remove_duplicates) { my @files = (); my @range = (); my @keyword_tvi = (); +my @file_emails = (); foreach my $file (@ARGV) { ##if $file is a directory and it lacks a trailing slash, add one @@ -242,15 +245,21 @@ foreach my $file (@ARGV) { } if ($from_filename) { push(@files, $file); - if (-f $file && $keywords) { + if (-f $file && ($keywords || $file_emails)) { open(FILE, "<$file") or die "$P: Can't open ${file}\n"; my $text = do { local($/) ; }; - foreach my $line (keys %keyword_hash) { - if ($text =~ m/$keyword_hash{$line}/x) { - push(@keyword_tvi, $line); + close(FILE); + if ($keywords) { + foreach my $line (keys %keyword_hash) { + if ($text =~ m/$keyword_hash{$line}/x) { + push(@keyword_tvi, $line); + } } } - close(FILE); + if ($file_emails) { + my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g; + push(@file_emails, clean_file_emails(@poss_addr)); + } } } else { my $file_cnt = @files; @@ -285,6 +294,8 @@ foreach my $file (@ARGV) { } } +@file_emails = uniq(@file_emails); + my @email_to = (); my @list_to = (); my @scm = (); @@ -377,6 +388,14 @@ if ($email) { } } } + + foreach my $email (@file_emails) { + my ($name, $address) = parse_email($email); + + my $tmp_email = format_email($name, $address, $email_usename); + push_email_address($tmp_email, ''); + add_role($tmp_email, 'in file'); + } } if ($email || $email_list) { @@ -453,6 +472,7 @@ MAINTAINER field selection options: --remove-duplicates => minimize duplicate email names/addresses --roles => show roles (status:subsystem, git-signer, list, etc...) --rolestats => show roles and statistics (commits/total_commits, %) + --file-emails => add email addresses found in -f file (default: 0 (off)) --scm => print SCM tree(s) if any --status => print status if any --subsystem => print subsystem name if any @@ -811,7 +831,9 @@ sub add_role { foreach my $entry (@email_to) { if ($email_remove_duplicates) { my ($entry_name, $entry_address) = parse_email($entry->[0]); - if ($name eq $entry_name || $address eq $entry_address) { + if (($name eq $entry_name || $address eq $entry_address) + && ($role eq "" || !($entry->[1] =~ m/$role/)) + ) { if ($entry->[1] eq "") { $entry->[1] = "$role"; } else { @@ -819,7 +841,9 @@ sub add_role { } } } else { - if ($email eq $entry->[0]) { + if ($email eq $entry->[0] + && ($role eq "" || !($entry->[1] =~ m/$role/)) + ) { if ($entry->[1] eq "") { $entry->[1] = "$role"; } else { @@ -1099,6 +1123,51 @@ sub sort_and_uniq { return @parms; } +sub clean_file_emails { + my (@file_emails) = @_; + my @fmt_emails = (); + + foreach my $email (@file_emails) { + $email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g; + my ($name, $address) = parse_email($email); + if ($name eq '"[,\.]"') { + $name = ""; + } + + my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name); + if (@nw > 2) { + my $first = $nw[@nw - 3]; + my $middle = $nw[@nw - 2]; + my $last = $nw[@nw - 1]; + + if (((length($first) == 1 && $first =~ m/[A-Za-z]/) || + (length($first) == 2 && substr($first, -1) eq ".")) || + (length($middle) == 1 || + (length($middle) == 2 && substr($middle, -1) eq "."))) { + $name = "$first $middle $last"; + } else { + $name = "$middle $last"; + } + } + + if (substr($name, -1) =~ /[,\.]/) { + $name = substr($name, 0, length($name) - 1); + } elsif (substr($name, -2) =~ /[,\.]"/) { + $name = substr($name, 0, length($name) - 2) . '"'; + } + + if (substr($name, 0, 1) =~ /[,\.]/) { + $name = substr($name, 1, length($name) - 1); + } elsif (substr($name, 0, 2) =~ /"[,\.]/) { + $name = '"' . substr($name, 2, length($name) - 2); + } + + my $fmt_email = format_email($name, $address, $email_usename); + push(@fmt_emails, $fmt_email); + } + return @fmt_emails; +} + sub merge_email { my @lines; my %saw; -- cgit v1.2.3 From 4b76c9da611593eed6a13527c5ebd00c173624ad Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 5 Mar 2010 13:43:03 -0800 Subject: scripts/get_maintainer.pl: add --sections, print entire matched subsystem Print the complete contents of the matched subsystems in pattern match depth order. Sample output: $ ./scripts/get_maintainer.pl --sections -f drivers/net/usb/smsc95xx.c USB SMSC95XX ETHERNET DRIVER M:Steve Glendinning L:netdev@vger.kernel.org S:Supported F:drivers/net/usb/smsc95xx.* USB SUBSYSTEM M:Greg Kroah-Hartman L:linux-usb@vger.kernel.org W:http://www.linux-usb.org T:quilt kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ S:Supported F:Documentation/usb/ F:drivers/net/usb/ F:drivers/usb/ F:include/linux/usb.h F:include/linux/usb/ NETWORKING DRIVERS L:netdev@vger.kernel.org W:http://www.linuxfoundation.org/en/Net T:git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.git S:Odd Fixes F:drivers/net/ F:include/linux/if_* F:include/linux/*device.h THE REST M:Linus Torvalds L:linux-kernel@vger.kernel.org Q:http://patchwork.kernel.org/project/LKML/list/ T:git git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git S:Buried alive in reporters F:* F:*/ Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index bff2390652c..e54f72f6c34 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -41,6 +41,7 @@ my $web = 0; my $subsystem = 0; my $status = 0; my $keywords = 1; +my $sections = 0; my $file_emails = 0; my $from_filename = 0; my $pattern_depth = 0; @@ -121,6 +122,7 @@ if (!GetOptions( 'web!' => \$web, 'pattern-depth=i' => \$pattern_depth, 'k|keywords!' => \$keywords, + 'sections!' => \$sections, 'fe|file-emails!' => \$file_emails, 'f|file' => \$from_filename, 'v|version' => \$version, @@ -152,10 +154,20 @@ if ($output_rolestats) { $output_roles = 1; } -my $selections = $email + $scm + $status + $subsystem + $web; -if ($selections == 0) { - usage(); - die "$P: Missing required option: email, scm, status, subsystem or web\n"; +if ($sections) { + $email = 0; + $email_list = 0; + $scm = 0; + $status = 0; + $subsystem = 0; + $web = 0; + $keywords = 0; +} else { + my $selections = $email + $scm + $status + $subsystem + $web; + if ($selections == 0) { + usage(); + die "$P: Missing required option: email, scm, status, subsystem or web\n"; + } } if ($email && @@ -357,6 +369,21 @@ foreach my $file (@files) { foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) { add_categories($line); + if ($sections) { + my $i; + my $start = find_starting_index($line); + my $end = find_ending_index($line); + for ($i = $start; $i < $end; $i++) { + my $line = $typevalue[$i]; + if ($line =~ /^[FX]:/) { ##Restore file patterns + $line =~ s/([^\\])\.([^\*])/$1\?$2/g; + $line =~ s/([^\\])\.$/$1\?/g; ##Convert . back to ? + $line =~ s/\\\./\./g; ##Convert \. to . + $line =~ s/\.\*/\*/g; ##Convert .* to * + } + print("$line\n"); + } + } } if ($email && $email_git) { @@ -486,6 +513,7 @@ Output type options: Other options: --pattern-depth => Number of pattern directory traversals (default: 0 (all)) --keywords => scan patch for keywords (default: 1 (on)) + --sections => print the entire subsystem sections with pattern matches --version => show version --help => show this help information -- cgit v1.2.3 From f11e9a1534c5e9dd4be97b30e6b24902e0ec325b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 5 Mar 2010 13:43:03 -0800 Subject: scripts/get_maintainer.pl: change --sections to print in the same style as MAINTAINERS Signed-off-by: Joe Perches Cc: Stefan Richter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 2 ++ 1 file changed, 2 insertions(+) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index e54f72f6c34..4cd83fae87c 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -381,8 +381,10 @@ foreach my $file (@files) { $line =~ s/\\\./\./g; ##Convert \. to . $line =~ s/\.\*/\*/g; ##Convert .* to * } + $line =~ s/^([A-Z]):/$1:\t/g; print("$line\n"); } + print("\n"); } } -- cgit v1.2.3 From 64f77f312b15f101bf6c4c65d5359ccc16e3f82b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 5 Mar 2010 13:43:04 -0800 Subject: scripts/get_maintainer.pl: add ability to read from STDIN Doesn't need or accept '-' as a trailing option to read stdin. Doesn't print usage() after bad options. Adds --usage as command line equivalent of --help Suggested-by: Borislav Petkov Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 4cd83fae87c..f8baeeb8c3f 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -126,7 +126,7 @@ if (!GetOptions( 'fe|file-emails!' => \$file_emails, 'f|file' => \$from_filename, 'v|version' => \$version, - 'h|help' => \$help, + 'h|help|usage' => \$help, )) { die "$P: invalid argument - use --help if necessary\n"; } @@ -141,9 +141,9 @@ if ($version != 0) { exit 0; } -if ($#ARGV < 0) { - usage(); - die "$P: argument missing: patchfile or -f file please\n"; +if (-t STDIN && !@ARGV) { + # We're talking to a terminal, but have no command line arguments. + die "$P: missing patchfile or -f file - use --help if necessary\n"; } if ($output_separator ne ", ") { @@ -165,7 +165,6 @@ if ($sections) { } else { my $selections = $email + $scm + $status + $subsystem + $web; if ($selections == 0) { - usage(); die "$P: Missing required option: email, scm, status, subsystem or web\n"; } } @@ -173,7 +172,6 @@ if ($sections) { if ($email && ($email_maintainer + $email_list + $email_subscriber_list + $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) { - usage(); die "$P: Please select at least 1 email option\n"; } @@ -248,12 +246,18 @@ my @range = (); my @keyword_tvi = (); my @file_emails = (); +if (!@ARGV) { + push(@ARGV, "&STDIN"); +} + foreach my $file (@ARGV) { - ##if $file is a directory and it lacks a trailing slash, add one - if ((-d $file)) { - $file =~ s@([^/])$@$1/@; - } elsif (!(-f $file)) { - die "$P: file '${file}' not found\n"; + if ($file ne "&STDIN") { + ##if $file is a directory and it lacks a trailing slash, add one + if ((-d $file)) { + $file =~ s@([^/])$@$1/@; + } elsif (!(-f $file)) { + die "$P: file '${file}' not found\n"; + } } if ($from_filename) { push(@files, $file); -- cgit v1.2.3 From 22dd5b0cba50a197aaa3bd2790a29ee2e8e4e372 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 5 Mar 2010 13:43:06 -0800 Subject: get_maintainer: fix perlcritic warnings perlcritic is a standard checker for Perl Best Practices. This patch fixes most of the warnings in the get_maintainer script. If kernel programmers are going to have checkpatch they should write clean scripts as well... Bareword file handle opened at line 176, column 1. See pages 202,204 of PBP. (Severity: 5) Two-argument "open" used at line 176, column 1. See page 207 of PBP. (Severity: 5) Bareword file handle opened at line 207, column 5. See pages 202,204 of PBP. (Severity: 5) Two-argument "open" used at line 207, column 5. See page 207 of PBP. (Severity: 5) Bareword file handle opened at line 246, column 6. See pages 202,204 of PBP. (Severity: 5) Two-argument "open" used at line 246, column 6. See page 207 of PBP. (Severity: 5) Bareword file handle opened at line 258, column 2. See pages 202,204 of PBP. (Severity: 5) Two-argument "open" used at line 258, column 2. See page 207 of PBP. (Severity: 5) Expression form of "eval" at line 983, column 17. See page 161 of PBP. (Severity: 5) Expression form of "eval" at line 985, column 17. See page 161 of PBP. (Severity: 5) Subroutine prototypes used at line 1186, column 1. See page 194 of PBP. (Severity: 5) Subroutine prototypes used at line 1206, column 1. See page 194 of PBP. (Severity: 5) Signed-off-by: Stephen Hemminger Acked-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index f8baeeb8c3f..9bb094138dd 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -185,8 +185,9 @@ if (!top_of_kernel_tree($lk_path)) { my @typevalue = (); my %keyword_hash; -open(MAINT, "<${lk_path}MAINTAINERS") || die "$P: Can't open MAINTAINERS\n"; -while () { +open (my $maint, '<', "${lk_path}MAINTAINERS") + or die "$P: Can't open MAINTAINERS: $!\n"; +while (<$maint>) { my $line = $_; if ($line =~ m/^(\C):\s*(.*)/) { @@ -211,13 +212,14 @@ while () { push(@typevalue, $line); } } -close(MAINT); +close($maint); my %mailmap; if ($email_remove_duplicates) { - open(MAILMAP, "<${lk_path}.mailmap") || warn "$P: Can't open .mailmap\n"; - while () { + open(my $mailmap, '<', "${lk_path}.mailmap") + or warn "$P: Can't open .mailmap: $!\n"; + while (<$mailmap>) { my $line = $_; next if ($line =~ m/^\s*#/); @@ -236,7 +238,7 @@ if ($email_remove_duplicates) { $mailmap{$name} = \@arr; } } - close(MAILMAP); + close($mailmap); } ## use the filenames on the command line or find the filenames in the patchfiles @@ -262,9 +264,10 @@ foreach my $file (@ARGV) { if ($from_filename) { push(@files, $file); if (-f $file && ($keywords || $file_emails)) { - open(FILE, "<$file") or die "$P: Can't open ${file}\n"; - my $text = do { local($/) ; }; - close(FILE); + open(my $f, '<', $file) + or die "$P: Can't open $file: $!\n"; + my $text = do { local($/) ; <$f> }; + close($f); if ($keywords) { foreach my $line (keys %keyword_hash) { if ($text =~ m/$keyword_hash{$line}/x) { @@ -280,8 +283,10 @@ foreach my $file (@ARGV) { } else { my $file_cnt = @files; my $lastfile; - open(PATCH, "<$file") or die "$P: Can't open ${file}\n"; - while () { + + open(my $patch, '<', $file) + or die "$P: Can't open $file: $!\n"; + while (<$patch>) { my $patch_line = $_; if (m/^\+\+\+\s+(\S+)/) { my $filename = $1; @@ -301,7 +306,8 @@ foreach my $file (@ARGV) { } } } - close(PATCH); + close($patch); + if ($file_cnt == @files) { warn "$P: file '${file}' doesn't appear to be a patch. " . "Add -f to options?\n"; @@ -1286,7 +1292,7 @@ sub rfc822_strip_comments { # valid: returns true if the parameter is an RFC822 valid address # -sub rfc822_valid ($) { +sub rfc822_valid { my $s = rfc822_strip_comments(shift); if (!$rfc822re) { @@ -1306,7 +1312,7 @@ sub rfc822_valid ($) { # from success with no addresses found, because an empty string is # a valid list. -sub rfc822_validlist ($) { +sub rfc822_validlist { my $s = rfc822_strip_comments(shift); if (!$rfc822re) { -- cgit v1.2.3 From a63ceb4c36a7674f7efa90e8ba96b44a3989d717 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 5 Mar 2010 13:43:06 -0800 Subject: get_maintainer: quote email address with period Picky mail systems won't accept email addresses where recipient has period in name; ie. David S. Miller will not work. Signed-off-by: Stephen Hemminger Acked-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 9bb094138dd..b61002dceab 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -605,7 +605,7 @@ sub parse_email { $name =~ s/^\"|\"$//g; $address =~ s/^\s+|\s+$//g; - if ($name =~ /[^a-z0-9 \.\-]/i) { ##has "must quote" chars + if ($name =~ /[^\w \-]/i) { ##has "must quote" chars $name =~ s/(? Date: Fri, 5 Mar 2010 13:43:07 -0800 Subject: scripts/get_maintainer.pl: fix possible infinite loop If MAINTAINERS section entries are misformatted, it was possible to have an infinite loop. Correct the defect by always moving the index to the end of section + 1 Also, exit check for exclude as soon as possible. Signed-off-by: Joe Perches Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index b61002dceab..f76f3d13276 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -347,6 +347,7 @@ foreach my $file (@files) { if ($type eq 'X') { if (file_match_pattern($file, $value)) { $exclude = 1; + last; } } } @@ -373,8 +374,7 @@ foreach my $file (@files) { } } - $tvi += ($end - $start); - + $tvi = $end + 1; } foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) { -- cgit v1.2.3 From 3a4df13d2420ae1998e5c7d26275f8714e84da30 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 23 Mar 2010 13:35:18 -0700 Subject: get_maintainer: repair STDIN usage Commit 22dd5b0cba50a197aaa3bd2790a29ee2e8e4e372 (fix perlcritic warnings) broke the ability to handle STDIN because the three argument version of open() cannot handle standard IO-streams (which is mentioned in PerlBestPractices, too). Signed-off-by: Wolfram Sang Cc: Stephen Hemminger Acked-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index f76f3d13276..6f97a13bcee 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -284,7 +284,7 @@ foreach my $file (@ARGV) { my $file_cnt = @files; my $lastfile; - open(my $patch, '<', $file) + open(my $patch, "< $file") or die "$P: Can't open $file: $!\n"; while (<$patch>) { my $patch_line = $_; -- cgit v1.2.3