The Code
In this example, the Perl
script will use Acrobat to read annotation (e.g., sticky notes) data
from the currently open PDF. The script will format this data using
HTML and then output it to stdout.
Copy the script in into a file named
SummarizeComments.pl. You can download this code
from http://www.pdfhacks.com/summarize/.
Example 1. Perl code for summarizing comments
# SummarizeComments.pl ver. 1.0
use strict;
use Win32::OLE;
my $app = Win32::OLE->new("AcroExch.App");
if( 0< $app->GetNumAVDocs ) { # a PDF is open in Acrobat
# open the HTML document
print "<html>\n<head>\n<title>PDF Comments Summary</title>\n</head>\n<body>\n";
my $found_notes_b= 0;
# get the active PDF and drill down to its PDDoc
my $avdoc= $app->GetActiveDoc;
my $pddoc= $avdoc->GetPDDoc;
# iterate over pages
my $num_pages= $pddoc->GetNumPages;
for( my $ii= 0; $ii< $num_pages; ++$ii ) {
my $pdpage= $pddoc->AcquirePage( $ii );
if( $pdpage ) {
# interate over annotations (e.g., sticky notes)
my $page_head_b= 0;
my $num_annots= $pdpage->GetNumAnnots;
for( my $jj= 0; $jj< $num_annots; ++$jj ) {
my $annot= $pdpage->GetAnnot( $jj );
# Pop-up annots give us duplicate contents
if( $annot->GetContents ne '' and
$annot->GetSubtype ne 'Popup' ) {
if( !$page_head_b ) { # output the page number
print "<h2>Page: " . ($ii+ 1) . "</h2>\n";
$page_head_b= 1;
}
# output the annotation title and format it a little
print "<p><i>" . $annot->GetTitle . "</i></p>\n";
# output the note text; replace carriage returns
# with paragraph breaks
my $comment= $annot->GetContents;
$comment =~ s/\r/<\/p>\n<p>/g;
print "<p>" . $comment . "</p>\n";
$found_notes_b= 1;
}
}
}
}
if( !$found_notes_b ) {
print "<h3>No Notes Found in PDF</h3>\n";
}
# close the HTML document
print "</body>\n</html>\n";
}