root/swish_website/bin/split.pl

Revision 1593, 1.7 kB (checked in by whmoseley, 4 years ago)

Add inital code for indexing site and docs.

Meta name "section" is used to limit to a section. Search for "defaultcontents"

defaultcontents section=docs
defaultcontents section=devel ( only devel docs)
defaultcontents section=website ( only web site pages, excluding docs and devel_docs )

  • Property svn:eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1 #!/usr/bin/perl -w
2 use strict;
3 use warnings;
4 use File::Find;
5
6 my $dir = shift || die "failed to specify directory";
7 my $pat = qr!<h\d><a name="([^"]+)"></a>([^<]+)</h\d>!i; #" for vim
8
9
10 if ( -f $dir ) {
11     warn "Indexing [$dir] as a single file\n";
12     index_path( $dir );
13     exit;
14 }
15
16 find( \&process_doc, $dir );
17
18 # swish-e -S program to index the HTML docs in sections.
19
20
21
22
23
24 sub process_doc {
25     my $file = $_;
26     my $path = $File::Find::name;
27     my $dir  = $File::Find::dir;
28
29     return if -d;
30
31     return unless /\.html$/;  # how's that!
32
33
34
35     return if /^\./;
36     return if /robots\.txt/;
37     return if /\.css$/;
38     return if $dir =~ m!/search!;
39     return if $dir =~ m!/graphics!;
40
41     index_path( $path );
42 }
43
44 sub index_path {
45     my ( $path ) = @_;
46
47     unless ( open( FH, "<$path" ) ) {
48         warn "Failed to open file - [$path]: $!\n";
49         return;
50     }
51
52     local $/;
53     index_doc( $path, <FH> );
54 }
55
56 sub index_doc {
57     my ($name, $doc) = @_;
58
59
60
61     my @sections = split /$pat/, $doc;
62     die unless @sections;
63
64     # Get rid of the first part
65     shift @sections;
66
67
68     my ( $title ) = $doc =~ m[<title>([^<]+)]is;
69     $title ||= "Swish-e Documentation";
70
71     $title =~ s/^Swish-e :: //;
72
73
74     while ( @sections ) {
75         my ( $section, $sec_text, $text ) = splice( @sections, 0, 3 );
76         output( $name, $section, $sec_text, $text, $title );
77     }
78
79 }
80 sub output {
81     my ( $name, $section, $sec_text, $text, $title ) = @_;
82
83     my $date = (stat $name)[9];
84
85
86     my $doc = <<EOF;
87 <html><head><title>$title : $sec_text</title></head>
88 <body>$text</body>
89 </html>
90 EOF
91
92     my $len = length $doc;
93     print <<EOF;
94 Path-Name: $name#$section
95 Content-Length: $len
96 Last-Mtime: $date
97 Document-Type: HTML*
98
99 EOF
100
101     print $doc;
102 }
Note: See TracBrowser for help on using the browser.