#!/usr/bin/awk -f # Disclaimer and Terms: You may use these scripts for commercial or # non-commercial use at your own risk, as long as you retain the # copyright statements in the source code. These scripts are provided # "AS IS" with no warranty whatsoever and are FREE for as long as you # want to use them. You can edit and adapt them to your requirements # without seeking permission from me. I only ask that you retain the # credits where they are due. Additionally, you may not redistribute # this without express prior permission. # # Author: Vishal Goenka # # text2html # # This AWK script converts a text file to html so that the text can be # included in an html page. In particular, it converts special HTML # characters so that they can be displayed as they should. This # script does not do any syntax highlighting (unlike java2html # programs), but it does support a few useful options as described in # the usage below: # # Usage: text2html.awk [numeric=1] [nbsp=1] [url=1] [header=1|title=] <file> # # nbsp ---- Indiates that non-breaking spaces should be converted # to  . If nbsp is set, spaces are converted to   # tabs are converted to and a line break <br> is # added to each line. # # url ---- Indicates that URLs should be converted to hyperlinks. # Following URL protocols are recognized, as defined by # RFC 1738. # http:// https:// mailto: ftp:// news: telnet:// gopher:// # nntp:// wais:// prospero:// # www. has been added to the list in conformance with most # e-mail client software behavior. # # header ---- Indicates that HTML header should be generated for the file. # If a title is specified, header is implied. If header is # specified and title is not, the filename is used as the # title. # # title ---- Indicates that an HTML header should be generated for the file, # using the specified title. # numeric ---- Indicates that HTML numeric entities should not # be converted. By default, HTML numeric entities, # represented as &#nnn; are html-converted, so that # they will look like &#nnn; in the HTML representation, # rather than the HTML character they represent. For example, # ')' in the text file will look ')' in the # result of text2html by default, and as 'A' if numeric # is specified. # func a(b){if(header||title){print "<html><title>"(title ? title:FILENAME)""}f=">|<|\"|&";b="\>|\<|\"|\&";if(numeric){f=f "$|&[^#]|&#$|&#[^0-9]|&#[0-9]";b=b "|\&|\&#|\&#|\&#"}if(j){f="\t| |" f;b="\ |\&j;|" b;j="
"}else print "
";k=split(f,m,"|");l=split(b,n,"|");if(url){m[++k]="(http://|https://|www[.]|mailto:|ftp://|news:|telnet://|file://|gopher://|nntp://|wais://|prospero://)[^ <>{}\"\`\x009\x010\x013[\\]]+";n[++l]="&";f=f "|" m[k]}print ""}func c(d,e,i,g,h){e="";while(d){if(!match(d,f)){e=e d;break}e=e substr(d,1,RSTART-1);g=RLENGTH;h=substr(d,RSTART,RLENGTH);d=substr(d,RSTART);for(i=1;i<=k;i++){if(match(d,m[i])==1){if(url&&(i==k)){gsub(".*",n[i],h);e=e h}else{e=e n[i];if(m[i]~/\[/)g--}d=substr(d,g+1);break}}}return e j}{if(FNR==1)a();print c($0)}END{print "";if(!j)print "
";if(header||title)print ""}