#!/usr/bin/env perl
# txt2pre --- convert my site's txt files to `pre'-based atom/rss/html
# Copyright (C) 2014-2021 all contributors
# Copyright (c) 2021 bandali
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
# This simple script borrows from a script of the same name from the
# wonderful public-inbox project, under AGPLv3+, with additions of
# my own.
# Update (2021-11-01): this script isn't currently used for generating
# my site's pages anymore; but kept for future reference.
use strict;
use warnings 'all';
use Getopt::Long;
my $format = 'html';
my $lang = 'en';
my $index = '';
my $header = '';
my $footer = '';
GetOptions ('format=s' => \$format,
'lang=s' => \$lang,
'index' => \$index,
'header' => \$header,
'footer' => \$footer)
or die("bad command line arguments\n");
my $author =
$lang eq 'en' ? 'bandali'
: $lang eq 'fa' ? 'بندعلی'
: '';
my $site_title =
$lang eq 'en' ? "${author}'s personal site"
: $lang eq 'fa' ? "سایت شخصی $author"
: '';
my $site_desc =
$lang eq 'en' ? "notes and blog posts by $author"
: $lang eq 'fa' ? "نوشتهها و بلاگ پستهای $author"
: '';
my $site_url =
($lang eq 'en') ? 'https://bndl.org'
: ($lang eq 'fa') ? 'https://bndl.org/fa/'
: '';
my $feed_id =
($lang eq 'en') ? "tag:bndl.org,2020:notes.$format"
: ($lang eq 'fa') ? "tag:bndl.org,2020:fa/notes.$format"
: '';
my $link_re =
qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher)://
[\@:\w\.-]+(?:/
(?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
(?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
(?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)?
)?
)}xi;
my %pairs = (
"(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays)
"'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby
"!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby
);
my %html_map = (
'&' => '&',
'<' => '<',
'>' => '>',
# '"' => '"',
# "'" => ''',
);
sub html_esc {
my ($s) = @_;
$s =~ s/([&<>])/$html_map{$1}/sge;
$s;
}
sub linkify {
my ($s) = @_;
$s =~ s^$link_re^
my $beg = $1 || '';
my $url = $2;
my $end = '';
# it's fairly common to end URLs in messages with
# '.', ',' or ';' to denote the end of a statement;
# assume the intent was to end the statement/sentence
# in English
if (defined(my $re = $pairs{$beg})) {
if ($url =~ s/$re//) {
$end = $1;
}
} elsif ($url =~ s/(\))?([\.,;])\z//) {
$end = $2;
# require ')' to be paired with '('
if (defined $1) { # ')'
if (index($url, '(') < 0) {
$end = ")$end";
} else {
$url .= ')';
}
}
} elsif ($url !~ /\(/ && $url =~ s/\)\z//) {
$end = ')';
}
$beg . "$url" . $end;
^geo;
$s;
}
my $out = '';
# atom/rss feed header and footer
if ($index and ($format eq 'atom' or $format eq 'rss')) {
if ($header) {
my $now_iso8601 = `date -Iseconds -u | tr -d \\\\n`;
my $now_rfc5322 = `date -uR | tr -d \\\\n`;
my $atom_rel = $format eq 'atom' ? 'self' : 'alternate';
my $rss_rel = $format eq 'rss' ? 'self' : 'alternate';
my $link = $format eq 'atom' ? 'link' : 'atom:link';
my $links = '';
if ($lang eq 'en') {
$links = qq(
<$link hreflang="fa" href="https://bndl.org/fa/notes.atom" rel="alternate" type="application/atom+xml" />
<$link hreflang="fa" href="https://bndl.org/fa/notes.rss" rel="alternate" type="application/rss+xml" />
<$link hreflang="fa" href="https://bndl.org/fa/bandali.fa.txt" rel="alternate" type="text/plain" />
<$link hreflang="fa" href="https://bndl.org/fa/" rel="alternate" type="text/html" />
<$link href="https://bndl.org/notes.atom" rel="$atom_rel" type="application/atom+xml" />
<$link href="https://bndl.org/notes.rss" rel="$rss_rel" type="application/rss+xml" />
<$link href="https://bndl.org/bandali.txt" rel="alternate" type="text/plain" />
<$link href="https://bndl.org" rel="alternate" type="text/html" />);
} elsif ($lang eq 'fa') {
$links = qq(
);
}
$links =~ s/^\n//;
$out .= '';
$out .= ($format eq 'atom') ? qq(
$site_title$site_desc$feed_id
$links
$now_iso8601)
: ($format eq 'rss') ? qq(
$site_title$site_desc
$site_url
$lang$now_rfc5322$now_rfc53221800
$links)
: '';
} elsif ($footer) {
$out .= ($format eq 'atom') ? ''
: ($format eq 'rss') ? ''
: '';
}
# we're done
goto PRINT;
}
my $txt = do { local $/; };
my $title = html_esc($txt =~ /\A([^\n]+)/);
$title =~ s/^\s+|\s+$//g;
$title .= " — $author" if $title !~ /$author/;
my ($upd, $pub, $url) = $txt =~ /(.*)\r?\n(.*)\r?\n(.*)\r?\n?\z/;
($upd) = $upd =~ /(?:updated|ویرایش): (.*)/ if $upd;
($pub) = $pub =~ /(?:published|انتشار): (.*)/ if $pub;
$upd = $pub if (!$upd);
($url) = $url =~ /(?:plain text|متن ساده): (.*)/ if $url;
$url = 'https://bndl.org/bandali-cv.txt'
if (!$url and $title =~ /curriculum vitae/);
$url = html_esc($url) if $url;
$txt = linkify(html_esc($txt));
my $upd_iso8601 = `date -Iseconds -ud '$upd' | tr -d \\\\n` if $upd;
my $pub_iso8601 = `date -Iseconds -ud '$pub' | tr -d \\\\n` if $pub;
my $pub_rfc5322 = `date -uRd '$pub' | tr -d \\\\n` if $pub;
my $url_html = $url =~ s/(?:[.]$lang)?[.]txt$/.html/r if $url;
$url_html =~ s|/bandali-(.*)|/$1| if $url_html;
my $slug = $url_html =~ s|.*/(.*)[.]html$|$1|r if $url_html;
my $note_id = "$feed_id:$slug" if $url_html;
# note header
if ($format eq 'html') {
$out .=
''
. qq(')
. qq(
\n)
. "$title\n"
. qq(\n)
. ($url
? qq(\n)
: '')
. (($index and $lang eq 'en')
? qq(\n)
: ($index and $lang eq 'fa')
? qq(\n)
: '')
. qq(\n"
. '