root/swish_website/Plugin/My/POD.pm

Revision 1818, 14.3 kB (checked in by whmoseley, 2 years ago)

First pass at trying to get this working again.
Seems like something is broken because the @links
array is empty after generating the html files,
and frankly, understanding all the magic in this code is
slow.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1 package My::POD;
2 use strict;
3 use Pod::POM;
4 use Pod::POM::View::HTML;
5 use base 'Template::Plugin';
6 use vars qw( $stash );  # for debugging links
7
8 if ( my $ file = shift ) {
9     warn "Testing links\n";
10     My::pod::test();
11 }
12
13
14 my $view_mode = 'Pod::POM::View::HTML'# The view module
15
16 # *Much* of this is based on (or copied from) Stas' DocSet 0.17
17 # This takes a pod file and uses Pod::POM to split it into sections, builds a table
18 # of contents and generates the HTML.
19 #
20 # Will also cache the page's OVERVIEW
21 #
22 # I'm not sure why spaces need to be removed from links.  Pod::POM doesn't remove them.
23 # Escaping of links and hrefs and name tags needs to be checked.  There was just a discussion
24 # on the TT list about escaping hrefs.  Then the xhtml validation rejected % and spaces,
25 # so no replace all non-word chars with an underscore.
26 #
27 # Todo:
28 #   might be nice to cache links to make sure there's no duplicates
29 #   and also need a way to cross validate links (but can use an external link checker)
30
31 sub new {
32     my ( $class, $context, $content ) = @_;
33
34     $stash = $context->stash;
35
36     my $template_name = $stash->get( 'template.name' );
37
38     # Enable output of debugging messages from Pod::POM
39     my $warn = sub { warn "[$template_name]: @_\n" }
40         if $stash->get( 'self.config.verbose' );
41
42     my $parser = Pod::POM->new( warn => $warn );
43
44
45     # Parse the pod into a Pod::POM tree
46     my $pom = $parser->parse_text( $content );
47
48
49     # Merge sequential verbatim (<pre>) sections into one
50     combine_verbatim_sections_hack( $pom );
51
52
53     my @sections = $pom->head1; # get pod into sections
54
55     # Get title of document and shift off first NAME section
56
57     my $doc_title;
58     if ( @sections && $sections[0]->title =~ /NAME/ ) {
59         $doc_title = ( shift @sections )->content;
60         $doc_title =~ s/^\s*|\s*$//sg;  # strip;
61     }
62
63
64
65     # Cache this page and overview for creating a table of contents page
66     # the bin/build script passes in the hash reference
67
68     if ( my $cache = $stash->get( 'toc_cache' ) ) {
69         $cache->{ $template_name } = {
70             page    => $stash->get( 'page.id' ),
71             title   => $doc_title,
72             abstract => fetch_abstract( \@sections ),
73         };
74     };
75
76
77     # Structure for returning info to template
78     # the "toc" is the table of contents for the top of the page
79     # the "podparts" is the document broken into sequential parts by
80     # =head*  This allows the template to wrap each section in a <div> and
81     # therefore makes indexing each section with swish-e easy -- allows targeted searches
82
83     my %data = (
84         pom         => $pom,
85         view        => 'My::Pod::View::HTML',
86         toc         => fetch_toc( @sections ),
87         podparts    => [ slice_by_head(@sections) ], # Must be lasts because modifies tree
88         title       => $doc_title,
89     );
90
91
92
93     return \%data;
94 }
95
96
97
98 #-----------------------------------------------------------------------------
99 # This function is modified from Stas' DocSet
100 # It flattens the recursive structure of the tree into a list of sections by =head
101 #
102 # Pass in:
103 #   @sections = list of head1 sections
104 #
105 # Returns:
106 #   flat list of sections for each headX section
107 #
108 # Note:
109 #   This MODIFIES the original tree!  Should probably clone and build a new tree.
110 #
111 # This is useful for printing and wrapping each headX section in, say, <div>
112
113 sub slice_by_head {
114     my @sections = @_;  # these are the sections at the current level.
115     my @body = ();
116
117     for my $node (@sections) {
118         my @next = ();
119         # assumption, after the first 'headX' section, there can only
120         # be other 'headX' sections
121
122         # Look for first =headX seciton and chop of the content there
123         # then process that new =headX section again
124
125         my $id = -1;
126         for ($node->content) {
127             $id++;
128             # Keep content until a =head section is found.  Then end current content
129             # section at that point.
130
131             next unless $_->type =~ /^head\d$/;
132
133             # Modify original content to just include the nodes up to the first =head
134             @next = splice @{$node->content}, $id;
135             last;
136         }
137
138         # combine all =head nodes (flatten)
139         push @body, $node, slice_by_head(@next);
140     }
141     return @body;
142 }
143
144 #---------------------------------------------------------------------------
145 # Merge verbatim sequential <pre> sections together
146
147 sub combine_verbatim_sections_hack {
148     my $tree = shift;
149     my @content = $tree->content;
150     my $size = @content - 1;
151
152     my $x = 0;
153     while ( $x <= $size ) {
154
155         my $item = $content[$x];
156
157         my $type = $item->type;
158         while ( $type eq 'verbatim' && $x < $size && $type eq $content[$x+1]->type ) {
159             $item->{text} .= "\n\n" . $content[$x+1]->text;
160             $content[$x+1]->{text} = '';
161             $x++;
162         }
163         combine_verbatim_sections_hack($item);
164         $x++;
165     }
166 }
167
168
169
170 #----------------------------------------------------------------------------
171 # Grabs the first paragraph from either DESCRIPTION or OVERVIEW -- whichever comes first
172
173
174 sub fetch_abstract {
175     my ( $sections ) = @_;
176     for ( 0 .. 2 ) {
177         next unless $sections->[$_] && $sections->[$_]->title =~ /DESCRIPTION|OVERVIEW/;
178
179         my $abstract = $sections->[$_]->content->present($view_mode);
180         $abstract =~ s|<p>(.*?)</p>.*|$1|s;
181         return $abstract;
182     }
183 }
184
185
186 #---------------------------------------------------------------------------------
187 # Walk tree building up a TOC
188
189 sub fetch_toc {
190     my ( @sections ) = @_;
191
192     my @toc = ();
193     my $level = 1;
194     for my $node (@sections) {
195         push @toc, render_toc_level($node, $level);
196     }
197     return \@toc;
198 }
199
200 # From DocSet POD.pm
201
202 sub render_toc_level {
203     my( $node, $level) = @_;
204     my $title = $node->title->present($view_mode);
205     my $anchor = My::Pod::View::HTML->escape_name( $title );
206
207     my %toc_entry = (
208         title    => $title,
209         link     => "#$anchor",
210     );
211
212     my @sub = ();
213     $level++;
214     if ($level <= 4) {
215         # if there are deeper than =head4 levels we don't go down (spec is 1-4)
216         my $method = "head$level";
217         for my $sub_node ($node->$method()) {
218             push @sub, render_toc_level($sub_node, $level);
219         }
220     }
221     $toc_entry{subs} = \@sub if @sub;
222
223     return \%toc_entry;
224 }
225
226
227
228 #---------------- Package to override Pod::POM::View::HTML ----------------
229
230 package My::Pod::View::HTML;
231 use strict;
232 use warnings;
233 use base 'Pod::POM::View::HTML';
234 use HTML::Entities;
235 use Template::Filters;
236 use File::Basename;
237
238 use vars qw( %targets @links );
239
240
241 #-------------- For checking that all pod links work ----------------
242 sub save_link {
243     my ($self, $href) = @_;
244
245
246     my $out_file = basename( $My::POD::stash->get( 'this.out_file' ) );  # for debugging links
247
248
249     push @links, {
250         href => $href,
251         on_page => $out_file,
252     };
253 }
254
255 sub save_target {
256     my ($self, $target) = @_;
257     my $out_file = basename( $My::POD::stash->get( 'this.out_file' ) );
258     $targets{$out_file}{$target}++;
259 }
260
261 sub validate_links {
262     # Maybe this was some hack to get at the config??
263     #my $all_files_processed = $My::POD::stash->get( 'self.config.all');
264     my $config = shift;
265     my $all_files_processed = $config->{all};
266
267
268     for my $link ( @links ) {
269         my ($name, $fragment) = split /#/, $link->{href}, 2;
270
271
272         # If processing all files then can check for page links
273         if ( $all_files_processed ) {
274             if ( $name && !$targets{$name} ) {
275                 warn "Link to page [$name] from $link->{on_page} could not be resolved\n";
276                 next;
277             }
278         } else {
279             next if $name && $name ne $link->{on_page};  # can't check (unless links are cached in the future
280         }
281
282         $name ||= $link->{on_page};  # default to current page;
283
284         # Check page link
285         unless ( $targets{ $name } ) {
286             warn "Link on page $link->{on_page} to page $name is unresolved\n";
287             next;
288         }
289         next unless $fragment;
290
291         # And check frag if one
292         unless ( $targets{ $name}{ $fragment } ) {
293             warn "Link [$link->{href}] on page $link->{on_page} could not be resolved\n";
294         } else {
295             warn "Link $link->{href} on page $link->{on_page} has multiple targets\n"
296                 if $targets{$name}{$fragment} > 1;
297         }
298
299
300     }
301
302     @links = ();
303     %targets = ();
304 }
305
306
307 #---------------- Override Pod::POM ---------------------------------
308
309 sub view_head1 { shift->show_head( @_ ) };
310 sub view_head2 { shift->show_head( @_ ) };
311 sub view_head3 { shift->show_head( @_ ) };
312 sub view_head4 { shift->show_head( @_ ) };
313
314 sub show_head {  # all all child nodes
315     my ($self, $head) = @_;
316     my $level = substr( $head->type, -1 );
317     return "\n<h$level>" . $self->anchor($head->title) . "</h$level>\n\n" .
318         $head->content->present($self);
319 }
320
321
322 #---------------- <pre> sections ----------------------------
323 # This just adds a class attribute
324 sub view_verbatim {
325     my ($self, $text) = @_;
326     return '' unless $text;
327     for ($text) {
328         s/&/&amp;/g;
329         s/</&lt;/g;
330         s/>/&gt;/g;
331     }
332
333     return qq{<pre class="pre-section">$text</pre>\n};
334 }
335
336
337
338
339 #----------------- <a name="foo"></a>foo ---------------------
340 sub anchor {
341     my($self, $title) = @_;
342     my $text = $self->escape_name( $title->present($self) );
343     my $t = Template::Filters::html_filter( $title );
344     $self->save_target( $text );
345     return qq[<a name="$text"></a>$t];
346 }
347
348 #----------------- prepare text for name="" --------------------
349 # This first unescapes any entities, and then removes all non-word chars
350 # and finally returns all lower case;
351
352
353 sub escape_name {
354     my ($self, $text) = @_;
355
356     # Comes in already HTML escaped -- well, kind of, quotes are not escaped.
357     my $plain = decode_entities( $text );
358
359     # Not much is allowed in the name="" attribute, so just hack away:
360     $plain =~ s/\W+/_/g;
361     return lc $plain;
362 }
363
364
365 #----------------- Fixup L<> links in pod -------------------------------
366
367 # Thes first two fixup L<> links.
368 # Pod::POM doesn't provide a way to get at the fragment for escaping,
369 # so let Pod::POM deal with it and then update
370
371 sub view_seq_link {
372     my $self = shift;
373     my $url = $self->SUPER::view_seq_link( @_ );
374     return unless $url;
375
376     return unless $url =~ /href="(.+)(?=">)/;
377     my ( $href, $fragment ) = split /#/, $1, 2;
378
379     $href = '' unless defined $href;
380     $href .= '#'. $self->escape_name( $fragment ) if defined $fragment;
381
382     $self->save_link( $href );  # for checking that they all go some place.
383
384     # Now replace the href into the original string
385
386     $url =~ s/href=".+(?=">)/href="$href/;
387
388     return $url;
389 }
390
391
392 # This allows fixing up links to *other* pages.  In our case, they are mostly to
393 # other pod pages, which are now lower case and end in .html.
394
395
396 sub view_seq_link_transform_path {
397     my ( $self, $link ) = @_;
398     return lc $link . '.html';
399 }
400
401
402 # Modified version of item display that removes the item_ prefix and only takes the first
403 # word
404 #
405 # Oh this won't work everywhere.  The problem is we have things like:
406 #
407 # =item * UndefinedMetaTags [error|ignore|INDEX|auto]
408 #
409 # so we take only the first word.  But that breaks if linking to multi-word item.
410 #
411 # $$$ fix me -- move to using item_ -- fix links in source.
412
413 sub view_item {
414     my ($self, $item) = @_;
415
416     my $title = $item->title();
417
418     if (defined $title) {
419         $title = $title->present($self) if ref $title;
420
421         $title =~ s/^\*\s*//;  # Remove leading bullet
422
423         if (length $title) {
424             my $anchor = $title;
425             $anchor =~ s/\s+.*$//;  # strip all trailing stuff from first space on
426             $anchor = $self->escape_name( $anchor );
427
428             $self->save_target( $anchor );
429             $self->save_target( "item_$anchor" );
430             $title = qq{<a name="item_$anchor"></a><a name="$anchor"></a><b>$title</b>};
431         }
432     }
433
434     return '<li>'
435         . "$title\n"
436         . $item->content->present($self)
437         . "</li>\n";
438 }
439
440
441 package My::pod;
442 use strict;
443 use warnings;
444
445 sub test {
446
447     # From perldoc perlpod:
448     #
449     my @l_tests = (
450         {
451             in  => ['L<name>'],
452             out => '<a href="name.html">name</a>',
453         },
454
455         {
456             in  => [ 'L<name/"section here">', 'L<name/section here>', ],
457             out => '<a href="name.html#section_here">section here</a>',
458         },
459
460         {
461             in  => ['L</"section here">', 'L</section here>', 'L<"section here">', ],
462             out => '<a href="#section_here">section here</a>',
463         },
464
465
466
467         {   in  => ['L<text here|name>'],
468             out => '<a href="name.html">text here</a>',
469         },
470
471         {
472             in  => ['L<text|name/"sec here">', 'L<text|name/sec here>'],
473             out => '<a href="name.html#sec_here">text</a>',
474         },
475
476         {
477             in  => ['L<text|/"sec">''L<text|/sec>', 'L<text|"sec">'],
478             out => '<a href="#sec">text</a>',
479         },
480
481         {
482             in  => ['L<http://host.name/some/path.html#fragment>'],
483             out => '<a href="http://host.name/some/path.html#fragment">http://host.name/some/path.html#fragment</a>',
484         },
485
486         {
487             in => ['L<Link to $foo-E<gt> with & "quotes" and %funny !!? chars>'],
488             out => '<a href="#link_to_foo_with_quotes_and_funny_chars">Link to $foo-&gt; with &amp; "quotes" and %funny !!? chars</a>',
489         },
490         {
491             in => ['L<Some & text|/Link to $foo-E<gt> with & "quotes" and %funny !!? chars>'],
492             out => '<a href="#link_to_foo_with_quotes_and_funny_chars">Some &amp; text</a>',
493         },
494         {
495             in => ['L<SWISH-RUN|SWISH-RUN>'],
496             out => '<a href="swish-run.html">SWISH-RUN</a>',
497         }
498     );
499
500
501     for my $test ( @l_tests ) {
502         for my $input ( @{$test->{in}} ) {
503
504             my $pod = Pod::POM->new( warn => 1 )->parse_text( "=head1 Title\n\n$input" );
505             my $out = My::Pod::Test::HTML->print( $pod );
506
507             warn "input  [$input]\n",
508                  "output [$out]\n",
509                  "test   [$test->{out}]\n\n"
510                         if $out ne $test->{out} || $ENV{VERBOSE};
511         }
512     }
513 }
514
515 package My::Pod::Test::HTML;
516 use strict;
517 use warnings;
518 use base 'My::Pod::View::HTML';
519
520 sub view_textblock {
521     my ($self, $text) = @_;
522     return $text;
523 }
524
525 sub view_pod {
526     my ($self, $pod) = @_;
527     return $pod->content->present($self);
528 }
529
530 sub view_head1 {
531     my ($self, $pod) = @_;
532     return $pod->content->present($self);
533 }
534
535
536 1;
537
Note: See TracBrowser for help on using the browser.