static char rcsid[] = "Header: /home/cs/phelps/spine/rman/RCS/rman.c,v 1.144 1999/08/10 00:41:55 phelps Exp phelps $"; /* PolyglotMan by Thomas A. Phelps (phelps@ACM.org) accept man pages as formatted by (10) Hewlett-Packard HP-UX, AT&T System V, SunOS, Sun Solaris, OSF/1, DEC Ultrix, SGI IRIX, Linux, FreeBSD, SCO output as (9) printable ASCII, section headers only, TkMan, [tn]roff, HTML, LaTeX, LaTeX2e, RTF, Perl pod, MIME, and soon SGML written March 24, 1993 bs2tk transformed into RosettaMan November 4-5, 1993 source interpretation added September 24, 1996 renamed PolyglotMan due to lawsuit by Rosetta, Inc. August 8, 1997 */ /* $XFree86: xc/extras/rman/rman.c,v 1.13 2000/12/21 18:55:27 dawes Exp $ */ /* TO DO **** clean up relationship between source and formatted filtering support routines output to SGML with Davenport DTD don't give SHORTLINE if just finished bullet of bultxt, ended section head, ... other cases? make sure text following bullet is aligned correctly output to WinHelp? don't have specs (anybody interested?) collect header and footer until hit blank line? what to do about tables? count second gap of spaces & average gap? ==> good idea but tables too variable for this to work internal, outline-like header section for HTML documents? how to put this *first*? (can't in single pass) one line look ahead to enable better parsing (item lists, et cetera) alluc (==nonlc) flag, copy curline to last line vector (works well with lookahead cache) ?? collect sundry globals into vectors (i.e., arrays and enum indexes) (if compiler has good constant propagation, then shouldn't slow access) collect scattered globals into vectors (e.g., curline[ispcnt]): array + enum curline, lastline, flags, pending, bufs+lens */ #include #include #include #include #include #include /* I'm sure I'll need some #ifdef's here to include the right headers */ /* ... but compiles swell on Solaris, DEC Alpha, HP, SunOS */ #include #include #include #ifdef Lynx extern int optind; extern char *optarg; #endif #ifdef __CYGWIN__ #include #endif /*** make #define's into consts? => can't because compilers not smart enough ***/ /* maximum number of tags per line */ #define MAXTAGS 50*100 /* BUFSIZ = 1024 on Solaris */ #define MAXBUF 2*5000 #define MAXLINES 20000 #define MAXTOC 500 #define xputchar(c) (fcharout? putchar(c): (c)) #define sputchar(c) (fcharout? plain[sI++]=(char)c: (char)(c)) #define stagadd(tag) tagadd(tag,sI,0) enum { c_rsquote='\''/*\x27*/, c_lsquote='`'/*\x60*/, c_dagger='\xa7', c_bullet='\xb7', c_plusminus='\xb1' }; /*** tag management ***/ enum tagtype { NOTAG, TITLE, ITALICS, BOLD, SYMBOL, SMALLCAPS, BOLDITALICS, MONO, MANREF }; /* MANREF last */ struct { enum tagtype type; int first; int last; } tags[MAXTAGS]; int tagc=0; struct { char *text; int type; int line; } toc[MAXTOC]; int tocc=0; /* characters in this list automatically prefixed by a backslash (set in output format function */ char *escchars=""; char *vollist = VOLLIST; const char *manvalid = "._-+:"; /* in addition to alphanumerics, valid characters to find in a man page name */ char *manrefname; char *manrefsect; enum command { /*BEGINCHARTAGS,*/ CHARTAB='\t', CHARPERIOD='.', CHARLSQUOTE='`', CHARRSQUOTE='\'', CHARGT='>', CHARLT='<', CHARAMP='&', CHARBACKSLASH='\\', CHARDASH='-', CHARHAT='^', CHARVBAR='|', CHARNBSP=0xa0, CHARCENT=0xa2, CHARSECT=0xa7, CHARCOPYR=0xa9, CHARNOT=0xac, CHARDAGGER=0xad, CHARREGTM=0xae, CHARDEG=0xb0, CHARPLUSMINUS=0xb1, CHARACUTE=0xb4, CHARBULLET=0xb7, CHAR14=0xbc, CHAR12=0xbd, CHAR34=0xbe, CHARMUL=0xd7, CHARDIV=0xf7, CHANGEBAR=0x100, CHARLQUOTE, CHARRQUOTE, HR, /*ENDCHARTAGS,*/ /*BEGINFONTTAGS,*/ BEGINBOLD, ENDBOLD, BEGINITALICS, ENDITALICS, BEGINBOLDITALICS, ENDBOLDITALICS, BEGINSC, ENDSC, BEGINY, ENDY, BEGINCODE, ENDCODE, BEGINMANREF, ENDMANREF, FONTSIZE, /*ENDFONTTAGS*/ /*BEGINLAYOUTTAGS,*/ ITAB, BEGINCENTER, ENDCENTER, /*ENDLAYOUTTAGS,*/ /*BEGINSTRUCTTAGS,*/ BEGINDOC, ENDDOC, BEGINCOMMENT, ENDCOMMENT, COMMENTLINE, BEGINBODY, ENDBODY, BEGINHEADER, ENDHEADER, BEGINFOOTER, ENDFOOTER, BEGINLINE, ENDLINE, SHORTLINE, BEGINSECTION, ENDSECTION, BEGINSUBSECTION, ENDSUBSECTION, BEGINSECTHEAD, ENDSECTHEAD, BEGINSUBSECTHEAD, ENDSUBSECTHEAD, BEGINBULPAIR, ENDBULPAIR, BEGINBULLET, ENDBULLET, BEGINBULTXT, ENDBULTXT, BEGINTABLE, ENDTABLE, BEGINTABLELINE, ENDTABLELINE, BEGINTABLEENTRY, ENDTABLEENTRY, BEGININDENT, ENDINDENT, BEGINCODEBLOCK, ENDCODEBLOCK, BEGINDIFFA, ENDDIFFA, BEGINDIFFD, ENDDIFFD /*,*//*ENDSTRUCTTAGS,*/ }; const char *tcltkOP[] = { "Command-Line Name", "Database Name", "Database Class" }; /* characters that need special handling in any output format, *more than just a backslash* */ /* characters in this list need a corresponding case statement in each output format */ /*char *trouble="\t.`'><&\\^|-\xa7\xb7\xb1";*/ const unsigned char trouble[]= { CHARTAB, CHARPERIOD, CHARLSQUOTE, CHARRSQUOTE, CHARGT, CHARLT, CHARAMP, CHARBACKSLASH, CHARDASH, CHARHAT, CHARVBAR, CHARCENT, CHARSECT, CHARCOPYR, CHARNOT, CHARDAGGER, CHARREGTM, CHARDEG, CHARPLUSMINUS, CHARACUTE, CHARBULLET, CHAR14, CHAR12, CHAR34, CHARMUL, CHARDIV, 0 }; enum command tagbeginend[][2] = { /* parallel to enum tagtype */ { -1,-1 }, { -1,-1 }, { BEGINITALICS, ENDITALICS }, { BEGINBOLD, ENDBOLD }, { BEGINY, ENDY }, { BEGINSC, ENDSC }, { BEGINBOLDITALICS, ENDBOLDITALICS }, { -1,-1 }, { BEGINMANREF, ENDMANREF } }; void (*fn)(enum command) = NULL; enum command prevcmd = BEGINDOC; /*** globals ***/ /* move all flags into an array? enum { fSubsX, fLast }; int flags[fLast]; */ int fSource=-1; /* -1 => not determined yet */ int fDiff=0; FILE *difffd; char diffline[MAXBUF]; char diffline2[MAXBUF]; char *message = NULL; int fontdelta=0; int intArg; int fPara=0; /* line or paragraph groupings of text */ int fSubsections=0; /* extract subsection titles too? */ int fChangeleft=0; /* move change bars to left? (-1 => delete them) */ int fReflow=0; int fURL=0; /* scan for URLs too? */ #if 0 int fMan=1; /* invoke agressive man page filtering? */ #endif int fQS=0; /* squeeze out spaces (scnt and interword)? */ int fIQS=0; /* squeeze out initial spaces (controlled separately from fQS) */ int fILQS=0; /* squeeze out spaces for usual indent */ int fHeadfoot=0; /* show canonical header and footer at bottom? */ int falluc=0; int itabcnt=0; int fQuiet=0; int fTclTk=0; /* patterns observed in section heads that don't conform to first-letter-uppercase-rest-lowercase pattern (stay all uc, or go all lc, or have subsequent uc) */ int lcexceptionslen = -1; /* computed by system */ char *lcexceptions[] = { /* new rule: double/all consonants == UC? */ /* articles, verbs, conjunctions, prepositions, pronouns */ "a", "an", "the", "am", "are", "is", "were", "and", "or", "by", "for", "from", "in", "into", "it", "of", "on", "to", "with", "that", "this", /* terms */ "API", "CD", "GUI", "UI", /*I/O=>I/O already*/ "ID", "IDs", "OO", "IOCTLS", "IPC", "RPC", /* system names */ "AWK", "cvs", "rcs", "GL", "vi", "PGP", "QuickTime", "DDD", "XPG/3", "NFS", "NIS", "NIS+", "AFS", "UNIX", "SysV", "XFree86", "ICCCM", "MH", "MIME", "TeX", "LaTeX", "PicTeX", "PostScript", "EPS", "EPSF", "EPSI", "HTML", "URL", "WWW", /* institution names */ "ANSI", "CERN", "GNU", "ISO", "NCSA", /* Sun-specific */ "MT-Level", "SPARC", NULL }; /* what exceptions have you encountered? */ int TabStops=8; int hanging=0; /* location of hanging indent (if ==0, none) */ enum { NAME, SYNOPSIS, DESCRIPTION, SEEALSO, FILES, AUTHOR, RANDOM }; /* RANDOM last */ char *sectheadname[] = { "NAME", "SYNOPSIS", "DESCRIPTION:INTRODUCTION", "SEE ALSO:RELATED INFORMATION", "FILES", "AUTHOR:AUTHORS", "RANDOM" }; int sectheadid = RANDOM; int oldsectheadid = RANDOM; int fCodeline=0; int fNOHY=0; /* re-linebreak so no words are hyphenated; not used by TkMan, but gotta keep for people converting formatted text */ int fNORM=0; /* normalize? initial space => tabs, no changebars, exactly one blank line between sections */ const char TABLEOFCONTENTS[] = "Table of Contents"; const char HEADERANDFOOTER[] = "Header and Footer"; char manName[80]="man page"; char manSect[10]="1"; const char provenance[] = "manual page source format generated by PolyglotMan v" POLYGLOTMANVERSION; const char anonftp[] = "available via anonymous ftp from ftp.cs.berkeley.edu:/ucb/people/phelps/tcltk/rman.tar.Z"; const char horizontalrule[] = "------------------------------------------------------------"; const int LINEBREAK=70; int linelen=0; /* length of result in plain[] */ int spcsqz; /* number of spaces squeezed out */ int ccnt=0; /* # of changebars */ int scnt,scnt2; /* counts of initial spaces in line */ int s_sum,s_cnt; int bs_sum, bs_cnt; int ncnt=0,oncnt=0; /* count of interline newlines */ int CurLine=1; int AbsLine=1-1; /* absolute line number */ int indent=0; /* global indentation */ int lindent=0; /* usual local indent */ int auxindent=0; /* aux indent */ int I; /* index into line/paragraph */ int fcharout=1; /* show text or not */ char lookahead; /*int tabgram[MAXBUF];*/ /* histogram of first character positions */ char buf[MAXBUF]; char plain[MAXBUF]; /* current text line with control characters stripped out */ char hitxt[MAXBUF]; /* highlighted text (available at time of BEGIN signal */ char header[MAXBUF]/*=""*/; /* complete line */ char header2[MAXBUF]/*=""*/; /* SGIs have two lines of headers and footers */ char header3[MAXBUF]/*=""*/; /* GNU and some others have a third! */ char footer[MAXBUF]/*=""*/; char footer2[MAXBUF]/*=""*/; #define CRUFTS 5 char *cruft[CRUFTS] = { header, header2, header3, footer, footer2 }; char *File, *in; /* File = pointer to full file contents, in = current file pointer */ char *argv0; int finTable=0; char tableSep='\0'; /*\t';*/ /*int fTable=0; int fotable=0;*/ char *tblcellformat; int tblcellspan; /*int tblspanmax;*/ int listtype=-1; /* current list type bogus to begin with */ enum listtypes { DL, OL, UL }; int fIP=0; /*** utility functions ***/ /* case insensitive versions of strcmp and strncmp */ static int stricmp(const char *s1, const char *s2) { assert(s1!=NULL && s2!=NULL); /*strincmp(s1, s2, strlen(s1)+1);*/ while (tolower(*s1)==tolower(*s2)) { if (*s1=='\0' /*&& *s2=='\0'*/) return 0; s1++; s2++; } if (tolower(*s1)0); while (n>0 && tolower(*s1)==tolower(*s2)) { n--; s1++; s2++; } if (n==0) return 0; else if (tolower(*s1)=-1 && end<=255); assert(sen==0 || sen==1); if (*l==':') l++; /* tolerate a leading colon */ /* invariant: c and v point to start of strings to compare */ while (*l) { assert(l==list || l[-1]==':'); for (c=candidate; *c && *l; c++,l++) if ((sen && *c!=*l) || (!sen && tolower(*c)!=tolower(*l))) break; /* if candidate matches a valid one as far as valid goes, it's a keeper */ if ((*l=='\0' || *l==':') && (*c==end || end==-1)) { if (*c=='\b') { c2 = c[-1]; while (*c=='\b' && c[1]==c2) c+=2; } /* no volume qualifiers with digits */ if (!isdigit(*c)) return 1; } /* bump to start of next valid */ while (*l && *l++!=':') /* nada */; } return 0; } static int strcoloncmp(char *candidate, int end, const char *list) { int sen=1; const char *l = list; assert(candidate!=NULL && list!=NULL); assert(end>=-1 && end<=255); if (*l=='=') l++; else end=-1; if (*l=='i') { sen=0; l++; } return strcoloncmp2(candidate, end, l, sen); } /* strdup not universally available */ static char * mystrdup(const char *p) { char *q; if (p==NULL) return NULL; q = malloc(strlen(p)+1); /* +1 gives space for \0 that is not reported by strlen */ if (q!=NULL) strcpy(q,p); return q; } /* given line of text, return "casified" version in place: if word in exceptions list, return exception conversion else uc first letter, lc rest */ static void casify(char *p) { char tmpch, *q, **exp; int fuc; for (fuc=1; *p; p++) { if (isspace(*p) || strchr("&/",*p)!=NULL) fuc=1; else if (fuc) { /* usually */ if (p[1] && isupper(p[1]) /*&& p[2] && isupper(p[2])*/) fuc=0; /* check for exceptions */ for (q=p; *q && !isspace(*q); q++) /*nada*/; tmpch = *q; *q='\0'; exp = (char **)bsearch(&p, lcexceptions, lcexceptionslen, sizeof(char *), lcexceptionscmp); *q = tmpch; if (exp!=NULL) { for (q=*exp; *q; q++) *p++=*q; fuc = 1; } } else *p=tolower(*p); } } /* add an attribute tag to a range of characters */ static void tagadd(int /*enum tagtype--abused in source parsing*/ type, int first, int last) { assert(type!=NOTAG); if (tagc0); assert(type==BEGINSECTION || type==BEGINSUBSECTION); if (tocc'"; static char *punct = ".,:;)]}?!"; while (*p==' ') p++; if (strincmp(p,"http",4)==0) { href="%s"; manrefname = p; p+=4; while (*p && !isspace(*p) && !strchr(nonhref,*p)) p++; while (strchr(punct, *(p - 1))) p--; } else { href = manRef; manrefname = p; while (*p && *p!=' ' && *p!='(') p++; *p++='\0'; while (*p==' ' || *p=='(') p++; p0=p; #ifdef XFree86 /* Don't allow a letter after the man section number */ p++; #else while (*p && *p!=')') p++; #endif manrefsect = p0; } *p='\0'; } /* * OUTPUT FORMATS * *** break these out so can selectively include them in the binary *** * *** does this save significant space? *** */ static void formattedonly(void) { fprintf(stderr, "The output formats for Tk and TkMan require nroff-formatted input\n"); exit(1); } /* * DefaultFormat -- in weak OO inheritance, top of hierarchy for everybody */ static void DefaultFormat(enum command cmd) { int i; switch (cmd) { case ITAB: for (i=0; i=2 && ((c=='-' || c=='%' || c=='\\' || c=='$' /**/ /* not much talk of money in man pages so reasonable */) && (isalnum(plain[1]) /*<= plain[1]!='-'*//*no dash*/ || ncnt/*GNU long option*/) && plain[1]!=' ') ) clo[clocnt++] = CurLine; /* would like to require second letter to be a capital letter to cut down on number of matches, but command names usually start with lowercase letter maybe use a uppercase requirement as secondary strategy, but probably not */ if ((ncnt || lastsect) && linelen>0 && scnt>0 && scnt<=5) para[paracnt++] = CurLine; lastsect=0; /* rebus too, instead of search through whole Tk widget */ if (rebuspatcnt && scnt>=5 /* not sect or subsect heads */) { for (p=plain; *p && *p!=' '; p++) /*empty*/; /* never first word */ while (*p) { for (i=0; i50) && (abs(scnt-lastscnt)<=1 || abs(scnt-hanging)<=1)) { finflow=1; putchar(' '); } else { Tk(ENDLINE); /*if ((CurLine&0x3f)==0x3f) printf("\"\nupdate idletasks\n$t insert end \""); blows up some Tk text buffer, apparently, on long lines*/ if ((CurLine&0x1f)==0x1f) printf("\"\nupdate idletasks\n$t insert end \""); finflow=0; /*if (fCodeline) printf("CODE");*/ } lastlinelen=linelen; lastscnt=scnt; break; case ENDLINE: /* don't call Tk(ENDLINE) */ break; default: /* if not caught above, it's the same as Tk */ Tk(cmd); } } /* * ASCII */ static void ASCII(enum command cmd) { int i; switch (cmd) { case ENDDOC: if (fHeadfoot) { printf("\n%s\n", HEADERANDFOOTER); for (i=0; i"); break; case BEGINBULLET: printf("\n=item "); break; case ENDBULLET: printf("\n\n"); fcharout=0; break; case BEGINBULTXT: fcharout=1; auxindent=hanging; break; case ENDBULTXT: auxindent=0; break; case ENDDOC: case BEGINBODY: case ENDBODY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case BEGINSECTION: case ENDSECTION: case BEGINSUBSECTION: case ENDSUBSECTION: case BEGINBULPAIR: case ENDBULPAIR: case SHORTLINE: case BEGINTABLE: case ENDTABLE: case BEGINTABLELINE: case ENDTABLELINE: case BEGINTABLEENTRY: case ENDTABLEENTRY: case BEGININDENT: case ENDINDENT: case FONTSIZE: case BEGINBOLDITALICS: case ENDBOLDITALICS: case BEGINY: case ENDY: case BEGINSC: case ENDSC: /* nothing */ break; default: DefaultLine(cmd); } } static void Sections(enum command cmd) { switch (cmd) { case ENDSECTHEAD: case ENDSUBSECTHEAD: putchar('\n'); case BEGINDOC: fcharout=0; break; case BEGINCOMMENT: fcharout=0; break; case ENDCOMMENT: fcharout=1; break; case COMMENTLINE: break; case BEGINSUBSECTHEAD: printf(" "); /* no break */ case BEGINSECTHEAD: fcharout=1; break; case CHARRQUOTE: case CHARLQUOTE: xputchar('"'); break; case CHARLSQUOTE: xputchar('`'); break; case CHARRSQUOTE: case CHARACUTE: xputchar('\''); break; case BEGINTABLE: case ENDTABLE: case BEGINTABLELINE: case ENDTABLELINE: case BEGINTABLEENTRY: case ENDTABLEENTRY: case BEGININDENT: case ENDINDENT: case FONTSIZE: break; case CHARPERIOD: case CHARTAB: case CHARDASH: case CHARBACKSLASH: case CHARLT: case CHARGT: case CHARHAT: case CHARVBAR: case CHARAMP: case CHARNBSP: xputchar(cmd); break; case CHARDAGGER: xputchar('+'); break; case CHARBULLET: xputchar('*'); break; case CHARPLUSMINUS: xputchar('+'); xputchar('-'); break; case CHARCENT: xputchar('c'); break; case CHARSECT: xputchar('S'); break; case CHARCOPYR: xputchar('('); xputchar('C'); xputchar(')'); break; case CHARNOT: xputchar('~'); break; case CHARREGTM: xputchar('('); xputchar('R'); xputchar(')'); break; case CHARDEG: xputchar('o'); break; case CHAR14: xputchar('1'); xputchar('/'); xputchar('4'); break; case CHAR12: xputchar('1'); xputchar('/'); xputchar('2'); break; case CHAR34: xputchar('3'); xputchar('/'); xputchar('4'); break; case CHARMUL: xputchar('X'); break; case CHARDIV: xputchar('/'); break; case ITAB: DefaultLine(cmd); break; default: /* nothing */ break; } } static void Roff(enum command cmd) { switch (cmd) { case BEGINDOC: I=1; printf(".TH %s %s \"generated by PolyglotMan\" UCB\n",manName,manSect); printf(".\\\" %s,\n",provenance); printf(".\\\" %s\n",anonftp); CurLine=1; break; case BEGINBODY: printf(".LP\n"); break; case BEGINCOMMENT: case ENDCOMMENT: break; case COMMENTLINE: printf("'\\\" "); break; case BEGINSECTHEAD: printf(".SH "); break; case BEGINSUBSECTHEAD:printf(".SS "); break; case BEGINBULPAIR: printf(".IP "); break; case SHORTLINE: printf("\n.br"); break; case BEGINBOLD: printf("\\fB"); break; /* \n.B -- grr! */ case ENDCODE: case ENDBOLD: printf("\\fR"); break; /* putchar('\n'); */ case BEGINITALICS: printf("\\fI"); break; case ENDITALICS: printf("\\fR"); break; case BEGINCODE: case BEGINBOLDITALICS:printf("\\f4"); break; case ENDBOLDITALICS: printf("\\fR"); break; case CHARLQUOTE: printf("\\*(rq"); break; case CHARRQUOTE: printf("\\*(lq"); break; case CHARNBSP: printf("\\|"); break; case CHARLSQUOTE: putchar('`'); break; case CHARRSQUOTE: putchar('\''); break; case CHARPERIOD: if (I==1) printf("\\&"); putchar('.'); I++; break; case CHARDASH: printf("\\-"); break; case CHARTAB: case CHARLT: case CHARGT: case CHARHAT: case CHARVBAR: case CHARAMP: putchar(cmd); break; case CHARBULLET: printf("\\(bu"); break; case CHARDAGGER: printf("\\(dg"); break; case CHARPLUSMINUS: printf("\\(+-"); break; case CHANGEBAR: putchar('|'); break; case CHARCENT: printf("\\(ct"); break; case CHARSECT: printf("\\(sc"); break; case CHARCOPYR: printf("\\(co"); break; case CHARNOT: printf("\\(no"); break; case CHARREGTM: printf("\\(rg"); break; case CHARDEG: printf("\\(de"); break; case CHARACUTE: printf("\\(aa"); break; case CHAR14: printf("\\(14"); break; case CHAR12: printf("\\(12"); break; case CHAR34: printf("\\(34"); break; case CHARMUL: printf("\\(mu"); break; case CHARDIV: printf("\\(di"); break; case HR: /*printf("\n%s\n", horizontalrule);*/ break; case CHARBACKSLASH: printf("\\\\"); break; /* correct? */ case BEGINLINE: /*for (i=0; i\n"); escchars = "{}\\"; break; case ENDDOC: printf("\n"); break; case BEGINCOMMENT: printf("\n\n"); break; case COMMENTLINE: break; case BEGINBODY: printf("{"); break; case ENDBODY: CurLine++; EnsembleDumpTags(); printf("}\n"); tagc=0; break; case BEGINSECTION: printf("
"); break; case ENDSECTION: printf("
\n"); break; case BEGINSECTHEAD: printf("{"); break; case ENDSECTHEAD: tagc=0; I=0; printf("}\n"); break; case BEGINSUBSECTHEAD: printf("{"); break; case ENDSUBSECTHEAD: tagc=0; I=0; printf("}\n"); break; case BEGINBULPAIR: printf(""); break; case ENDBULPAIR: printf("\n"); break; case BEGINBULLET: printf("{"); break; case ENDBULLET: tagc=0; I=0; printf("}"); break; case BEGINBULTXT: printf("{"); break; case ENDBULTXT: EnsembleDumpTags(); CurLine++; printf("}"); break; case BEGINSUBSECTION: printf("\n"); break; case ENDSUBSECTION: printf("\n"); break; case SHORTLINE: /*poppush(prevcmd);*/ break; case CHARRQUOTE: case CHARLQUOTE: putchar('"'); I++; break; case CHARLSQUOTE: putchar('\''); break; case CHARRSQUOTE: putchar('\''); break; case CHARPERIOD: case CHARTAB: case CHARDASH: case CHARBACKSLASH: case CHARLT: case CHARGT: case CHARHAT: case CHARVBAR: case CHARAMP: case CHARBULLET: case CHARDAGGER: case CHARPLUSMINUS: case CHARNBSP: case CHARCENT: case CHARSECT: case CHARCOPYR: case CHARNOT: case CHARREGTM: case CHARDEG: case CHARACUTE: case CHAR14: case CHAR12: case CHAR34: case CHARMUL: case CHARDIV: putchar(cmd); I++; break; case ENDLINE: putchar(' '); I++; break; case HR: /*printf("\n%s\n", horizontalrule);*/ break; case CHANGEBAR: /* maybe something later */ case BEGINLINE: case BEGINY: case ENDY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case BEGINBOLD: case ENDBOLD: case BEGINCODE: case ENDCODE: case BEGINITALICS: case ENDITALICS: case BEGINBOLDITALICS: case ENDBOLDITALICS: case BEGINSC: case ENDSC: case BEGINTABLE: case ENDTABLE: case BEGINTABLELINE: case ENDTABLELINE: case BEGINTABLEENTRY: case ENDTABLEENTRY: case BEGININDENT: case ENDINDENT: case FONTSIZE: case BEGINMANREF: case ENDMANREF: /* easy strike for hypertext--want to dynamically generate, though */ /* nothing */ break; default: DefaultPara(cmd); } } #endif /* * HTML */ static void HTML(enum command cmd) { static int pre=0; int i; int lasttoc; /* always respond to these signals */ switch (cmd) { case CHARNBSP: printf(" "); I++; break; case CHARTAB: printf("    "); break; /* old browsers--who uses these?--don't understand symbolic codes */ /* case CHARNBSP: printf(" "); I++; break; case CHARLQUOTE: printf("«"); break; case CHARRQUOTE: printf("»"); break; case CHARTAB: printf("    "); break; */ case CHARLQUOTE: case CHARRQUOTE: case CHARLSQUOTE: case CHARRSQUOTE: case CHARPERIOD: case CHARDASH: case CHARBACKSLASH: case CHARVBAR: /*printf("¦"); -- broken bar no good */ case CHARHAT: putchar(cmd); break; case CHARDAGGER: printf("*"); break; case CHARBULLET: printf("·"/*"·"*//*§--middot hardly visible*/); break; case CHARPLUSMINUS: printf("±"/*"±"*/); break; case CHARGT: printf(">"); break; case CHARLT: printf("<"); break; case CHARAMP: printf("&"); break; case CHARCENT: printf("¢"); break; /* translate these to symbolic forms, sometime */ case CHARSECT: printf("§"); break; case CHARCOPYR: printf("©"); break; case CHARNOT: printf("¬"); break; case CHARREGTM: printf("®"); break; case CHARDEG: printf("°"); break; case CHARACUTE: printf("´"); break; case CHAR14: printf("¼"); break; case CHAR12: printf("½"); break; case CHAR34: printf("¾"); break; case CHARMUL: printf("×"); break; case CHARDIV: printf("÷"); break; default: break; } /* while in pre mode... */ if (pre) { switch (cmd) { case ENDLINE: I=0; CurLine++; if (!fPara && scnt) printf("
"); printf("\n"); break; case ENDTABLE: if (fSource) { printf("\n"); } else { printf("
\n"); pre=0; fQS=fIQS=fPara=1; } break; case ENDCODEBLOCK: printf(""); pre=0; break; case SHORTLINE: case ENDBODY: printf("\n"); break; case BEGINBOLD: printf(""); break; case ENDBOLD: printf(""); break; case BEGINITALICS: printf(""); break; case ENDITALICS: printf(""); break; default: /* nothing */ break; } return; } /* usual operation */ switch (cmd) { case BEGINDOC: /* escchars = ... => HTML doesn't backslash-quote metacharacters */ printf("\n",provenance); printf("\n\n",anonftp); printf("\n\n"); /* printf("\n");*/ /* better title possible? */ printf(""); printf(manTitle, manName, manSect); printf("\n"); #ifdef XFree86 printf("\n\n"); #else printf("\n\n"); #endif printf("%s

\n", TABLEOFCONTENTS); I=0; break; case ENDDOC: /* header and footer wanted? */ printf("

\n"); if (fHeadfoot) { printf("


%s

\n", HEADERANDFOOTER); for (i=0; i\n",cruft[i]); } if (!tocc) { /*printf("\n

ERROR: Empty man page

\n");*/ } else { printf("\n

\n"); printf("%s

\n", TABLEOFCONTENTS); printf("

    \n"); for (i=0, lasttoc=BEGINSECTION; i\n"); else printf("
\n"); } printf("
  • %s
  • \n", i, i, toc[i].text); } if (lasttoc==BEGINSUBSECTION) printf(""); printf("\n"); } printf("\n"); break; case BEGINBODY: printf("

    \n"); break; case ENDBODY: break; case BEGINCOMMENT: printf("\n\n"); break; case COMMENTLINE: printf(" "); break; case BEGINSECTHEAD: printf("\n

    ", tocc, tocc); break; case ENDSECTHEAD: printf("

    \n"); /* useful extraction from FILES, ENVIRONMENT? */ break; case BEGINSUBSECTHEAD: printf("\n

    ", tocc, tocc); break; case ENDSUBSECTHEAD: printf("

    \n"); break; case BEGINSECTION: break; case ENDSECTION: if (sectheadid==NAME && message!=NULL) printf(message); break; case BEGINSUBSECTION: break; case ENDSUBSECTION: break; case BEGINBULPAIR: if (listtype==OL) printf("\n
      \n"); else if (listtype==UL) printf("\n
        \n"); else printf("\n
        \n"); break; case ENDBULPAIR: if (listtype==OL) printf("\n
    \n"); else if (listtype==UL) printf("\n\n"); else printf("\n"); break; case BEGINBULLET: if (listtype==OL || listtype==UL) fcharout=0; else printf("\n
    "); break; case ENDBULLET: if (listtype==OL || listtype==UL) fcharout=1; else printf("
    "); break; case BEGINBULTXT: if (listtype==OL || listtype==UL) printf("
  • "); else printf("\n
    "); break; case ENDBULTXT: if (listtype==OL || listtype==UL) printf("
  • "); else printf("\n"); break; case BEGINLINE: /* if (ncnt) printf("

    \n"); -- if haven't already generated structural tag */ if (ncnt) printf("

    \n"); /* trailing spaces already trimmed off, so look for eol now */ if (fCodeline) { printf(""); for (i=0; i
    "); fCodeline=0; } I=0; CurLine++; if (!fPara && scnt) printf("
    "); printf("\n"); break; case SHORTLINE: if (fCodeline) { printf("
    "); fCodeline=0; } if (!fIP) printf("
    \n"); break; case BEGINTABLE: if (fSource) { /*printf("

    \n");*/ printf("
    \n"); } else { printf("
    \n"); pre=1; fQS=fIQS=fPara=0;
    		}
    		break;
    	   case ENDTABLE:
    		if (fSource) {
    		  printf("
    \n"); } else { printf("
    \n"); pre=0; fQS=fIQS=fPara=1; } break; case BEGINTABLELINE: printf(""); break; case ENDTABLELINE: printf("\n"); break; case BEGINTABLEENTRY: printf("1) printf(" COLSPAN=%d", tblcellspan); printf(">"); break; case ENDTABLEENTRY: printf(""); break; /* something better with CSS */ case BEGININDENT: printf("
    "); break; case ENDINDENT: printf("
    \n"); break; case FONTSIZE: /* HTML font step sizes are bigger than troff's */ if ((fontdelta+=intArg)!=0) printf("", (intArg>0)?'+':'-'); else printf("\n"); break; case BEGINBOLD: printf(""); break; case ENDBOLD: printf(""); break; case BEGINITALICS: printf(""); break; case ENDITALICS: printf(""); break; case BEGINBOLDITALICS: case BEGINCODE: printf(""); break; case ENDBOLDITALICS: case ENDCODE: printf(""); break; case BEGINCODEBLOCK: printf("
    "); pre=1; break;	/* wrong for two-column lists in kermit.1, pine.1, perl4.1 */
    	   case ENDCODEBLOCK:	printf("
    "); pre=0; break; case BEGINCENTER: printf("
    "); break; case ENDCENTER: printf("
    "); break; case BEGINMANREF: manrefextract(hitxt); if (fmanRef) { printf(""); } else printf(""); break; case ENDMANREF: if (fmanRef) printf("\n"); else printf(""); break; case HR: printf("\n
    \n"); break; /* U (was B, I), strike -- all temporary until HTML 4.0's INS and DEL widespread */ case BEGINDIFFA: printf(""); break; case ENDDIFFA: printf(""); break; case BEGINDIFFD: printf(""); break; case ENDDIFFD: printf(""); break; case BEGINSC: case ENDSC: case BEGINY: case ENDY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case CHANGEBAR: /* nothing */ break; default: DefaultPara(cmd); } } /* * SGML */ /* same as HTML but just has man page-specific DTD */ /* follows the Davenport DocBook DTD v2.3, availble from ftp.ora.com */ /*char *docbookpath = "docbook.dtd";*/ static void SGML(enum command cmd) { static int pre=0; int i; char *p; static int fRefEntry=0; static int fRefPurpose=0; /*static char *bads => SGML doesn't backslash-quote metacharacters */ /* fprintf(stderr, "The framework for SGML is in place but not done. If you\n" "are familiar with the DocBook DTD, however, it shouldn't be\n" "too difficult to finish it. If you do so, please send your\n" "code to me so that I may share the wealth in the next release.\n" ); exit(1); */ /* always respond to these signals */ switch (cmd) { case CHARLQUOTE: case CHARRQUOTE: printf("""); break; case CHARLSQUOTE: case CHARRSQUOTE: case CHARPERIOD: case CHARTAB: case CHARHAT: case CHARVBAR: case CHARBULLET: case CHARDAGGER: case CHARPLUSMINUS: case CHARNBSP: case CHARCENT: case CHARSECT: case CHARCOPYR: case CHARNOT: case CHARREGTM: case CHARDEG: case CHARACUTE: case CHAR14: case CHAR12: case CHAR34: case CHARMUL: case CHARDIV: putchar(cmd); break; case CHARAMP: printf("&"); break; case CHARDASH: if (sectheadid==NAME && !fRefPurpose) { printf(""); fRefPurpose=1; } else putchar('-'); break; case CHARBACKSLASH: putchar('\\'); break; case CHARGT: printf(">"); break; case CHARLT: printf("<"); break; default: break; } /* while in pre mode... */ if (pre) { switch (cmd) { case ENDLINE: I=0; CurLine++; if (!fPara && scnt) printf("
    "); printf("\n"); break; case ENDTABLE: if (fSource) { printf("\n"); } else { printf("
    \n"); pre=0; fQS=fIQS=fPara=1; } break; default: /* nothing */ break; } return; } /* usual operation */ switch (cmd) { case BEGINDOC: /*printf("\n", docbookpath);*/ printf("\n"); printf("\n\n",anonftp); /* better title possible? */ printf("\n", manName, manSect); printf("%s", manName); printf("%s\n\n", manSect); I=0; break; case ENDDOC: /* header and footer wanted? */ if (fHeadfoot) { printf("%s\n", HEADERANDFOOTER); for (i=0; i%s\n",cruft[i]); printf(""); } /* table of contents, such as found in HTML, can be generated automatically by SGML software */ printf("\n"); break; case BEGINBODY: printf("\n\n"); break; case ENDBODY: break; case BEGINCOMMENT: printf("\n\n"); break; case COMMENTLINE: break; case BEGINSECTHEAD: case BEGINSUBSECTHEAD: printf(""); break; case ENDSECTHEAD: case ENDSUBSECTHEAD: printf(""); break; case BEGINSECTION: if (sectheadid==NAME) printf(""); /*printf(""); -- do lotsa parsing here for RefName, RefPurpose*/ else if (sectheadid==SYNOPSIS) printf(""); else printf(""); break; case ENDSECTION: if (oldsectheadid==NAME) printf("\n\n"); else if (oldsectheadid==SYNOPSIS) printf("\n\n"); else printf("\n\n"); break; case BEGINSUBSECTION: printf(""); break; case ENDSUBSECTION: printf(""); break; /* need to update this for enumerated and plain lists */ case BEGINBULPAIR: printf("\n"); break; case ENDBULPAIR: printf("\n"); break; case BEGINBULLET: printf(""); break; case ENDBULLET: printf(""); break; case BEGINBULTXT: printf(""); break; case ENDBULTXT: printf("\n"); break; case BEGINLINE: /* remember, get BEGINBODY call at start of paragraph */ if (fRefEntry) { if (fRefPurpose) { for (p=plain; *p!='-'; p++) { /* nothing?! */ } } } break; case ENDLINE: /*if (fCodeline) { fIQS=1; fCodeline=0; }*/ if (fCodeline) { printf("
    "); fCodeline=0; } I=0; CurLine++; if (!fPara && scnt) printf("
    "); printf("\n"); break; case SHORTLINE: if (fCodeline) { printf(""); fCodeline=0; } if (!fIP) printf("
    \n"); break; case BEGINTABLE: if (fSource) { printf("\n"); } else { printf("
    \n"); pre=1; fQS=fIQS=fPara=0;
    		}
    		break;
    	   case ENDTABLE:
    		if (fSource) {
    		  printf("
    \n"); } else { printf("
    \n"); pre=0; fQS=fIQS=fPara=1; } break; case BEGINTABLELINE: printf(""); break; case ENDTABLELINE: printf("\n"); break; case BEGINTABLEENTRY: printf(""); break; case ENDTABLEENTRY: printf(""); break; case BEGININDENT: case ENDINDENT: case FONTSIZE: break; /* have to make some guess about bold and italics */ case BEGINBOLD: printf(""); break; case ENDBOLD: printf(""); break; case BEGINITALICS: printf(""); break; case ENDITALICS: printf(""); break; case BEGINBOLDITALICS: case BEGINCODE: printf(""); break; case ENDBOLDITALICS: case ENDCODE: printf(""); break; case BEGINMANREF: /* manrefextract(hitxt); if (fmanRef) { printf("\n"); } else printf(""); break; */ printf(""); break; case ENDMANREF: /* if (fmanRef) printf(""); else printf("");*/ printf(""); break; case HR: /*printf("\n
    \n", horizontalrule);*/ break; case BEGINSC: case ENDSC: case BEGINY: case ENDY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case CHANGEBAR: /* nothing */ break; default: DefaultPara(cmd); } } #if 0 /* * GNU Texinfo -- somebody should finish this up */ static void Texinfo(enum command cmd) { static int pre=0; int i; /* always respond to these signals */ switch (cmd) { case CHARNBSP: printf(" "); I++; break; case CHARLQUOTE: printf("«"); break; case CHARRQUOTE: printf("»"); break; case CHARTAB: printf("    "); break; /* old browsers--who uses these?--don't understand symbolic codes */ /* case CHARNBSP: printf(" "); I++; break; case CHARLQUOTE: printf("«"); break; case CHARRQUOTE: printf("»"); break; case CHARTAB: printf("    "); break; */ case CHARLSQUOTE: case CHARRSQUOTE: case CHARPERIOD: case CHARDASH: case CHARBACKSLASH: case CHARVBAR: /*printf("¦"); -- broken bar no good */ case CHARHAT: putchar(cmd); break; case CHARDAGGER: printf("*"); break; case CHARBULLET: printf("·"/*"·"*//*§--middot hardly visible*/); break; case CHARPLUSMINUS: printf("±"/*"±"*/); break; case CHARGT: printf(">"); break; case CHARLT: printf("<"); break; case CHARAMP: printf("&"); break; case CHARCENT: printf("¢"); break; /* translate these to symbolic forms, sometime */ case CHARSECT: printf("§"); break; case CHARCOPYR: printf("©"); break; case CHARNOT: printf("¬"); break; case CHARREGTM: printf("®"); break; case CHARDEG: printf("°"); break; case CHARACUTE: printf("´"); break; case CHAR14: printf("¼"); break; case CHAR12: printf("½"); break; case CHAR34: printf("¾"); break; case CHARMUL: printf("×"); break; case CHARDIV: printf("÷"); break; default: break; } /* while in pre mode... */ if (pre) { switch (cmd) { case ENDLINE: I=0; CurLine++; if (!fPara && scnt) printf("
    "); printf("\n"); break; case ENDTABLE: if (fSource) { printf("\n"); } else { printf("
    \n"); pre=0; fQS=fIQS=fPara=1; } break; default: /* nothing */ break; } return; } /* usual operation */ switch (cmd) { case BEGINDOC: /* escchars = ... => HTML doesn't backslash-quote metacharacters */ printf("\n",provenance); printf("\n\n",anonftp); printf("\n\n"); /* printf("\n");*/ /* better title possible? */ printf(""); printf(manTitle, manName, manSect); printf("\n"); printf("\n\n"); printf("%s

    \n", TABLEOFCONTENTS); I=0; break; case ENDDOC: /* header and footer wanted? */ printf("

    \n"); if (fHeadfoot) { printf("


    %s

    \n", HEADERANDFOOTER); for (i=0; i\n",cruft[i]); } if (!tocc) { /*printf("\n

    ERROR: Empty man page

    \n");*/ } else { printf("\n

    \n"); printf("%s

    \n", TABLEOFCONTENTS); printf("

      \n"); for (i=0, lasttoc=BEGINSECTION; i\n"); else printf("
    \n"); } printf("
  • %s
  • \n", i, i, toc[i].text); } if (lasttoc==BEGINSUBSECTION) printf(""); printf("\n"); } printf("\n"); break; case BEGINBODY: printf("

    \n"); break; case ENDBODY: break; case BEGINCOMMENT: case ENDCOMMENT: break; case COMMENTLINE: printf("@c "); break; case BEGINSECTHEAD: break; case ENDSECTHEAD: printf("\n@node %s\n", toc[tocc].text); printf("\n@section %s\n\n", toc[tocc].text); /* useful extraction from FILES, ENVIRONMENT? */ break; case BEGINSUBSECTHEAD: break; case ENDSUBSECTHEAD: printf("\n@node %s\n", toc[tocc].text); printf("\n@subsection %s\n\n", toc[tocc].text); break; case BEGINSECTION: break; case ENDSECTION: break; case BEGINSUBSECTION: break; case ENDSUBSECTION: break; case BEGINBULPAIR: if (listtype==OL) printf("\n

      \n"); else if (listtype==UL) printf("\n
        \n"); else printf("\n
        \n"); break; case ENDBULPAIR: if (listtype==OL) printf("\n
    \n"); else if (listtype==UL) printf("\n\n"); else printf("\n"); break; case BEGINBULLET: if (listtype==OL || listtype==UL) fcharout=0; else printf("\n
    "); break; case ENDBULLET: if (listtype==OL || listtype==UL) fcharout=1; else printf("
    "); break; case BEGINBULTXT: if (listtype==OL || listtype==UL) printf("
  • "); else printf("\n
    "); break; case ENDBULTXT: if (listtype==OL || listtype==UL) printf("
  • "); else printf("\n"); break; case BEGINLINE: /* if (ncnt) printf("

    \n");*/ /* trailing spaces already trimmed off, so look for eol now */ if (fCodeline) { printf(""); for (i=0; i
    "); fCodeline=0; } I=0; CurLine++; if (!fPara && scnt) printf("
    "); printf("\n"); break; case SHORTLINE: if (fCodeline) { printf("
    "); fCodeline=0; } if (!fIP) printf("
    \n"); break; case BEGINTABLE: if (fSource) { /*printf("

    \n");*/ printf("
    \n"); } else { printf("
    \n"); pre=1; fQS=fIQS=fPara=0;
    		}
    		break;
    	   case ENDTABLE:
    		if (fSource) {
    		  printf("
    \n"); } else { printf("
    \n"); pre=0; fQS=fIQS=fPara=1; } break; case BEGINTABLELINE: printf(""); break; case ENDTABLELINE: printf("\n"); break; case BEGINTABLEENTRY: printf("1) printf(" COLSPAN=%d", tblcellspan); printf(">"); break; case ENDTABLEENTRY: printf(""); break; /* something better with CSS */ case BEGININDENT: printf("
    "); break; case ENDINDENT: printf("
    \n"); break; case FONTSIZE: /* HTML font step sizes are bigger than troff's */ if ((fontdelta+=intArg)!=0) printf("", (intArg>0)?'+':'-'); else printf("\n"); break; case BEGINBOLD: printf("@b{"); break; /* } */ case BEGINITALICS: printf("@i{"); break; case BEGINSC: printf("@sc{"); break; /* } */ case ENDITALICS: case ENDBOLD: case ENDSC: /* { */ printf("}"); break; case BEGINBOLDITALICS: case BEGINCODE: printf(""); break; case ENDBOLDITALICS: case ENDCODE: printf(""); break; case BEGINMANREF: manrefextract(hitxt); if (fmanRef) { printf("@xref{}"); } /*"); }*/ else printf(""); break; case ENDMANREF: if (fmanRef) printf("\n"); else printf(""); break; case HR: printf("\n
    \n"); break; case BEGINY: case ENDY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case CHANGEBAR: /* nothing */ break; default: DefaultPara(cmd); } } #endif /* generates MIME compliant to RFC 1563 */ static void MIME(enum command cmd) { static int pre=0; int i; /* always respond to these signals */ switch (cmd) { case CHARDASH: case CHARAMP: case CHARPERIOD: case CHARTAB: putchar(cmd); break; case CHARLSQUOTE: putchar('`'); break; case CHARACUTE: case CHARRSQUOTE: putchar('\''); break; case CHARBULLET: putchar('*'); break; case CHARDAGGER: putchar('|'); break; case CHARPLUSMINUS: printf("+-"); break; case CHARNBSP: putchar(' '); break; case CHARCENT: putchar('c'); break; case CHARSECT: putchar('S'); break; case CHARCOPYR: printf("(C)"); break; case CHARNOT: putchar('~'); break; case CHARREGTM: printf("(R)"); break; case CHARDEG: putchar('o'); break; case CHAR14: printf("1/4"); break; case CHAR12: printf("1/2"); break; case CHAR34: printf("3/4"); break; case CHARMUL: putchar('X'); break; case CHARDIV: putchar('/'); break; case CHARLQUOTE: case CHARRQUOTE: putchar('"'); break; case CHARBACKSLASH: /* these should be caught as escaped chars */ case CHARGT: case CHARLT: #if 0 assert(1); #endif break; default: break; } /* while in pre mode... */ if (pre) { switch (cmd) { case ENDLINE: I=0; CurLine++; if (!fPara && scnt) printf("\n\n"); break; case ENDTABLE: printf("\n\n"); pre=0; fQS=fIQS=fPara=1; break; default: /* nothing */ break; } return; } /* usual operation */ switch (cmd) { case BEGINDOC: printf("Content-Type: text/enriched\n"); printf("Text-Width: 60\n"); escchars = "<>\\"; I=0; break; case ENDDOC: /* header and footer wanted? */ printf("\n\n"); if (fHeadfoot) { printf("\n"); MIME(BEGINSECTHEAD); printf("%s",HEADERANDFOOTER); MIME(ENDSECTHEAD); for (i=0; i\n"); printf("%s\n%s\n", provenance, anonftp); printf("\n\n"); */ /* printf("\n

    \n"); printf("%s

    \n", TABLEOFCONTENTS); printf("

      \n"); for (i=0, lasttoc=BEGINSECTION; i\n"); else printf("
    \n"); } printf("
  • %s
  • \n", i, i, toc[i].text); } if (lasttoc==BEGINSUBSECTION) printf(""); printf("\n"); printf("\n"); */ break; case BEGINBODY: printf("\n\n"); break; case ENDBODY: break; case BEGINCOMMENT: fcharout=0; break; case ENDCOMMENT: fcharout=1; break; case COMMENTLINE: break; case BEGINSECTHEAD: printf("\n"); /*A NAME=\"sect%d\" HREF=\"#toc%d\">

    ", tocc, tocc);*/ break; case ENDSECTHEAD: printf("\n\n"); /* useful extraction from files, environment? */ break; case BEGINSUBSECTHEAD: printf(""); /*\n

    ", tocc, tocc);*/ break; case ENDSUBSECTHEAD: printf("\n\n"); break; case BEGINSECTION: case BEGINSUBSECTION: break; case ENDSECTION: case ENDSUBSECTION: printf("\n"); break; case BEGINBULPAIR: break; case ENDBULPAIR: break; case BEGINBULLET: printf(""); break; case ENDBULLET: printf("\t"); break; case BEGINBULTXT: case BEGININDENT: printf(""); break; case ENDBULTXT: case ENDINDENT: printf("\n"); break; case FONTSIZE: if ((fontdelta+=intArg)==0) { if (intArg>0) printf(""); else printf(""); } else { if (intArg>0) printf(""); else printf(""); } break; case BEGINLINE: /*if (ncnt) printf("\n\n");*/ break; case ENDLINE: I=0; CurLine++; printf("\n"); break; case SHORTLINE: if (!fIP) printf("\n\n"); break; case BEGINTABLE: printf("\n"); pre=1; fQS=fIQS=fPara=0; break; case ENDTABLE: printf("\n"); pre=0; fQS=fIQS=fPara=1; break; case BEGINTABLELINE: case ENDTABLELINE: case BEGINTABLEENTRY: case ENDTABLEENTRY: break; /* could use a new list type */ case BEGINBOLD: printf(""); break; case ENDBOLD: printf(""); break; case BEGINITALICS: printf(""); break; case ENDITALICS: printf(""); break; case BEGINCODE: case BEGINBOLDITALICS:printf(""); break; case ENDCODE: case ENDBOLDITALICS: printf(""); break; case BEGINMANREF: printf("blue"); /* how to make this hypertext? manrefextract(hitxt); if (fmanRef) { printf("\n"); } else printf(""); break; */ break; case ENDMANREF: printf(""); break; case HR: printf("\n\n%s\n\n", horizontalrule); break; case BEGINSC: case ENDSC: case BEGINY: case ENDY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case CHANGEBAR: /* nothing */ break; default: DefaultPara(cmd); } } /* * LaTeX */ static void LaTeX(enum command cmd) { switch (cmd) { case BEGINDOC: escchars = "$&%#_{}"; /* and more to come? */ printf("%% %s,\n", provenance); printf("%% %s\n\n", anonftp); /* definitions */ printf( "\\documentstyle{article}\n" "\\def\\thefootnote{\\fnsymbol{footnote}}\n" "\\setlength{\\parindent}{0pt}\n" "\\setlength{\\parskip}{0.5\\baselineskip plus 2pt minus 1pt}\n" "\\begin{document}\n" ); I=0; break; case ENDDOC: /* header and footer wanted? */ printf("\n\\end{document}\n"); break; case BEGINBODY: printf("\n\n"); break; case ENDBODY: break; case BEGINCOMMENT: case ENDCOMMENT: break; case COMMENTLINE: printf("%% "); break; case BEGINSECTION: break; case ENDSECTION: break; case BEGINSECTHEAD: printf("\n\\section{"); tagc=0; break; case ENDSECTHEAD: printf("}"); /* if (CurLine==1) printf("\\footnote{" "\\it conversion to \\LaTeX\ format by PolyglotMan " "available via anonymous ftp from {\\tt ftp.berkeley.edu:/ucb/people/phelps/tcltk}}" ); */ /* useful extraction from files, environment? */ printf("\n"); break; case BEGINSUBSECTHEAD:printf("\n\\subsection{"); break; case ENDSUBSECTHEAD: printf("}"); break; case BEGINSUBSECTION: break; case ENDSUBSECTION: break; case BEGINBULPAIR: printf("\\begin{itemize}\n"); break; case ENDBULPAIR: printf("\\end{itemize}\n"); break; case BEGINBULLET: printf("\\item ["); break; case ENDBULLET: printf("] "); break; case BEGINLINE: /*if (ncnt) printf("\n\n");*/ break; case ENDLINE: I=0; putchar('\n'); CurLine++; break; case BEGINTABLE: printf("\\begin{verbatim}\n"); break; case ENDTABLE: printf("\\end{verbatim}\n"); break; case BEGINTABLELINE: case ENDTABLELINE: case BEGINTABLEENTRY: case ENDTABLEENTRY: break; case BEGININDENT: case ENDINDENT: case FONTSIZE: break; case SHORTLINE: if (!fIP) printf("\n\n"); break; case BEGINBULTXT: break; case ENDBULTXT: putchar('\n'); break; case CHARLQUOTE: printf("``"); break; case CHARRQUOTE: printf("''"); break; case CHARLSQUOTE: case CHARRSQUOTE: case CHARPERIOD: case CHARTAB: case CHARDASH: case CHARNBSP: putchar(cmd); break; case CHARBACKSLASH: printf("$\\backslash$"); break; case CHARGT: printf("$>$"); break; case CHARLT: printf("$<$"); break; case CHARHAT: printf("$\\char94{}$"); break; case CHARVBAR: printf("$|$"); break; case CHARAMP: printf("\\&"); break; case CHARBULLET: printf("$\\bullet$ "); break; case CHARDAGGER: printf("\\dag "); break; case CHARPLUSMINUS: printf("\\pm "); break; case CHARCENT: printf("\\hbox{\\rm\\rlap/c}"); break; case CHARSECT: printf("\\S "); break; case CHARCOPYR: printf("\\copyright "); break; case CHARNOT: printf("$\\neg$"); break; case CHARREGTM: printf("(R)"); break; case CHARDEG: printf("$^\\circ$"); break; case CHARACUTE: putchar('\''); break; case CHAR14: printf("$\\frac{1}{4}$"); break; case CHAR12: printf("$\\frac{1}{2}$"); break; case CHAR34: printf("$\\frac{3}{4}$"); break; case CHARMUL: printf("\\times "); break; case CHARDIV: printf("\\div "); break; case BEGINCODE: case BEGINBOLD: printf("{\\bf "); break; /* } */ case BEGINSC: printf("{\\sc "); break; /* } */ case BEGINITALICS: printf("{\\it "); break; /* } */ case BEGINBOLDITALICS:printf("{\\bf\\it "); break; /* } */ case BEGINMANREF: printf("{\\sf "); break; /* } */ case ENDCODE: case ENDBOLD: case ENDSC: case ENDITALICS: case ENDBOLDITALICS: case ENDMANREF: /* { */ putchar('}'); break; case HR: /*printf("\n%s\n", horizontalrule);*/ break; case BEGINY: case ENDY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case CHANGEBAR: /* nothing */ break; default: DefaultPara(cmd); } } static void LaTeX2e(enum command cmd) { switch (cmd) { /* replace selected commands ... */ case BEGINDOC: escchars = "$&%#_{}"; printf("%% %s,\n", provenance); printf("%% %s\n\n", anonftp); /* definitions */ printf( "\\documentclass{article}\n" "\\def\\thefootnote{\\fnsymbol{footnote}}\n" "\\setlength{\\parindent}{0pt}\n" "\\setlength{\\parskip}{0.5\\baselineskip plus 2pt minus 1pt}\n" "\\begin{document}\n" ); I=0; break; case BEGINCODE: case BEGINBOLD: printf("\\textbf{"); break; /* } */ case BEGINSC: printf("\\textsc{"); break; /* } */ case BEGINITALICS: printf("\\textit{"); break; /* } */ case BEGINBOLDITALICS:printf("\\textbf{\\textit{"); break; /* }} */ case BEGINMANREF: printf("\\textsf{"); break; /* } {{ */ case ENDBOLDITALICS: printf("}}"); break; /* ... rest same as old LaTeX */ default: LaTeX(cmd); } } /* * Rich Text Format (RTF) */ /* RTF could use more work */ static void RTF(enum command cmd) { switch (cmd) { case BEGINDOC: escchars = "{}"; /* definitions */ printf( /* fonts */ "{\\rtf1\\deff2 {\\fonttbl" "{\\f20\\froman Times;}{\\f150\\fnil I Times Italic;}" "{\\f151\\fnil B Times Bold;}{\\f152\\fnil BI Times BoldItalic;}" "{\\f22\\fmodern Courier;}{\\f23\\ftech Symbol;}" "{\\f135\\fnil I Courier Oblique;}{\\f136\\fnil B Courier Bold;}{\\f137\\fnil BI Courier BoldOblique;}" "{\\f138\\fnil I Helvetica Oblique;}{\\f139\\fnil B Helvetica Bold;}}" "\n" /* style sheets */ "{\\stylesheet{\\li720\\sa120 \\f20 \\sbasedon222\\snext0 Normal;}" "{\\s2\\sb200\\sa120 \\b\\f3\\fs20 \\sbasedon0\\snext2 section head;}" "{\\s3\\li180\\sa120 \\b\\f20 \\sbasedon0\\snext3 subsection head;}" "{\\s4\\fi-1440\\li2160\\sa240\\tx2160 \\f20 \\sbasedon0\\snext4 detailed list;}}" "\n" /* more header to come--do undefined values default to nice values? */ ); I=0; break; case ENDDOC: /* header and footer wanted? */ printf("\\par{\\f150 %s,\n%s}", provenance, anonftp); printf("}\n"); break; case BEGINBODY: printf("\n\n"); break; case ENDBODY: CurLine++; printf("\\par\n"); tagc=0; break; case BEGINCOMMENT: fcharout=0; break; case ENDCOMMENT: fcharout=1; break; case COMMENTLINE: break; case BEGINSECTION: break; case ENDSECTION: printf("\n\\par\n"); break; case BEGINSECTHEAD: printf("{\\s2 "); tagc=0; break; case ENDSECTHEAD: printf("}\\par"); /* useful extraction from files, environment? */ printf("\n"); break; case BEGINSUBSECTHEAD:printf("{\\s3 "); break; case ENDSUBSECTHEAD: printf("}\\par\n"); break; case BEGINSUBSECTION: break; case ENDSUBSECTION: break; case BEGINLINE: /*if (ncnt) printf("\n\n");*/ break; case ENDLINE: I=0; putchar(' '); /*putchar('\n'); CurLine++;*/ break; case SHORTLINE: if (!fIP) printf("\\line\n"); break; case BEGINBULPAIR: printf("{\\s4 "); break; case ENDBULPAIR: printf("}\\par\n"); break; case BEGINBULLET: break; case ENDBULLET: printf("\\tab "); fcharout=0; break; case BEGINBULTXT: fcharout=1; break; case ENDBULTXT: break; case CHARLQUOTE: printf("``"); break; case CHARRQUOTE: printf("''"); break; case CHARLSQUOTE: case CHARRSQUOTE: case CHARPERIOD: case CHARTAB: case CHARDASH: case CHARBACKSLASH: case CHARGT: case CHARLT: case CHARHAT: case CHARVBAR: case CHARAMP: case CHARNBSP: case CHARCENT: case CHARSECT: case CHARCOPYR: case CHARNOT: case CHARREGTM: case CHARDEG: case CHARACUTE: case CHAR14: case CHAR12: case CHAR34: case CHARMUL: case CHARDIV: putchar(cmd); break; case CHARBULLET: printf("\\bullet "); break; case CHARDAGGER: printf("\\dag "); break; case CHARPLUSMINUS: printf("\\pm "); break; case BEGINCODE: case BEGINBOLD: printf("{\\b "); break; /* } */ case BEGINSC: printf("{\\fs20 "); break; /* } */ case BEGINITALICS: printf("{\\i "); break; /* } */ case BEGINBOLDITALICS:printf("{\\b \\i "); break; /* } */ case BEGINMANREF: printf("{\\f22 "); break; /* } */ case ENDBOLD: case ENDCODE: case ENDSC: case ENDITALICS: case ENDBOLDITALICS: case ENDMANREF: /* { */ putchar('}'); break; case HR: printf("\n%s\n", horizontalrule); break; case BEGINY: case ENDY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case BEGINTABLE: case ENDTABLE: case BEGINTABLELINE: case ENDTABLELINE: case BEGINTABLEENTRY: case ENDTABLEENTRY: case BEGININDENT: case ENDINDENT: case FONTSIZE: case CHANGEBAR: /* nothing */ break; default: DefaultPara(cmd); } } /* * pointers to existing tools */ static void PostScript(enum command cmd) { fprintf(stderr, "Use groff or psroff to generate PostScript.\n"); exit(1); } static void FrameMaker(enum command cmd) { fprintf(stderr, "FrameMaker comes with filters that convert from roff to MIF.\n"); exit(1); } /* * Utilities common to both parses */ /* level 0: DOC - need match level 1: SECTION - need match level 2: SUBSECTION | BODY | BULLETPAIR level 3: BODY (within SUB) | BULLETPAIR (within SUB) | BULTXT (within BULLETPAIR) level 4: BULTXT (within BULLETPAIR within SUBSECTION) never see: SECTHEAD, SUBSECTHEAD, BULLET */ int Psect=0, Psub=0, Pbp=0, Pbt=0, Pb=0, Pbul=0; static void pop(enum command cmd) { assert(cmd==ENDINDENT || cmd==BEGINBULLET || cmd==BEGINBULTXT || cmd==BEGINBULPAIR || cmd==BEGINBODY || cmd==BEGINSECTION || cmd==BEGINSUBSECTION || cmd==ENDDOC); /* int i; int p; int match; p=cmdp-1; for (i=cmdp-1;i>=0; i--) if (cmd==cmdstack[i]) { match=i; break; } */ /* if match, pop off all up to and including match */ /* otherwise, pop off one level*/ if (Pbul) { (*fn)(ENDBULLET); Pbul=0; if (cmd==BEGINBULLET) return; } /* else close off ENDBULTXT */ if (Pbt) { (*fn)(ENDBULTXT); Pbt=0; } if (cmd==BEGINBULTXT || cmd==BEGINBULLET) return; if (Pb && cmd==BEGINBULPAIR) { (*fn)(ENDBODY); Pb=0; } /* special */ if (Pbp) { (*fn)(ENDBULPAIR); Pbp=0; } if (cmd==BEGINBULPAIR || cmd==ENDINDENT) return; if (Pb) { (*fn)(ENDBODY); Pb=0; } if (cmd==BEGINBODY) return; if (Psub) { (*fn)(ENDSUBSECTION); Psub=0; } if (cmd==BEGINSUBSECTION) return; if (Psect) { (*fn)(ENDSECTION); Psect=0; } if (cmd==BEGINSECTION) return; } static void poppush(enum command cmd) { assert(cmd==ENDINDENT || cmd==BEGINBULLET || cmd==BEGINBULTXT || cmd==BEGINBULPAIR || cmd==BEGINBODY || cmd==BEGINSECTION || cmd==BEGINSUBSECTION); pop(cmd); switch (cmd) { case BEGINBULLET: Pbul=1; break; case BEGINBULTXT: Pbt=1; break; case BEGINBULPAIR: Pbp=1; break; case BEGINBODY: Pb=1; break; case BEGINSECTION: Psect=1; break; case BEGINSUBSECTION: Psub=1; break; default: if (!fQuiet) fprintf(stderr, "poppush: unrecognized code %d\n", cmd); } (*fn)(cmd); prevcmd = cmd; } /* * PREFORMATTED PAGES PARSING */ /* wrapper for getchar() that expands tabs, and sends maximum of n=40 consecutive spaces */ static int getchartab(void) { static int tabexp = 0; static int charinline = 0; static int cspccnt = 0; char c; c = lookahead; if (tabexp) tabexp--; else if (c=='\n') { charinline=0; cspccnt=0; } else if (c=='\t') { tabexp = TabStops-(charinline%TabStops); if (tabexp==TabStops) tabexp=0; lookahead = c = ' '; } else if (cspccnt>=40) { if (*in==' ') { while (*in==' '||*in=='\t') in++; in--; } cspccnt=0; } if (!tabexp && lookahead) lookahead = *in++; if (c=='\b') charinline--; else charinline++; if (c==' ') cspccnt++; return c; } /* replace gets. handles hyphenation too */ static char * la_gets(char *buf) { static char la_buf[MAXBUF]; /* can lookahead a full line, but nobody does now */ static int fla=0, hy=0; char *ret,*p; int c,i; assert(buf!=NULL); if (fla) { /* could avoid copying if callers used return value */ strcpy(buf,la_buf); fla=0; ret=buf; /* correct? */ } else { /*ret=gets(buf); -- gets is deprecated (since it can read too much?) */ /* could do this... ret=fgets(buf, MAXBUF, stdin); buf[strlen(buf)-1]='\0'; ... but don't want to have to rescan line with strlen, so... */ i=0; p=buf; /* recover spaces if re-linebreaking */ for (; hy; hy--) { *p++=' '; i++; } while (lookahead && (c=getchartab())!='\n' && ibuf && p[-1]=='-' && isspace(lookahead)) { p--; /* zap hyphen */ /* zap boldfaced hyphens, gr! */ while (p[-1]=='\b' && p[-2]=='-') p-=2; /* start getting next line, spaces first ... */ while (lookahead && isspace(lookahead) && lookahead!='\n') { getchartab(); hy++; } /* ... append next nonspace string to previous ... */ while (lookahead && !isspace(lookahead) && i++=3 spaces) */ int phraselen; static void filterline(char *buf, char *plain) { char *p,*q,*r; char *ph; int iq; int i,j; int hl=-1, hl2=-1; int iscnt=0; /* interword space count */ int tagci; int I0; int etype; int efirst; enum tagtype tag = NOTAG; assert(buf!=NULL && plain!=NULL); etype=NOTAG; efirst=-1; tagci=tagc; ph=phrase; phraselen=0; scnt=scnt2=0; s_sum=s_cnt=0; bs_sum=bs_cnt=0; ccnt=0; spcsqz=0; /* strip only certain \x1b's and only at very beginning of line */ for (p=buf; *p=='\x1b' && (p[1]=='8'||p[1]=='9'); p+=2) /* nop */; strcpy(plain,p); q=&plain[strlen(p)]; /*** spaces and change bars ***/ for (scnt=0,p=plain; *p==' '; p++) scnt++; /* initial space count */ if (scnt>200) scnt=130-(q-p); assert(*q=='\0'); q--; if (fChangeleft) for (; q-40>plain && *q=='|'; q--) { /* change bars */ if (fChangeleft!=-1) ccnt++; while (q-2>=plain && q[-1]=='\b' && q[-2]=='|') q-=2; /* boldface changebars! */ } /*if (q!=&plain[scnt-1])*/ /* zap trailing spaces */ for (; *q==' ' && q>plain; q--) /* nop */; /* second changebar way out east! HACK HACK HACK */ if (q-plain>100 && *q=='|') { while (*q=='|' && q>plain) { q--; if (fChangeleft!=-1) ccnt++; } while ((*q==' ' || *q=='_' || *q=='-') && q>plain) q--; } for (r=q; (*r&0xff)==CHARDAGGER; r--) *r='-'; /* convert daggers at end of line to hyphens */ if (q-plain < scnt) scnt = q-plain+1; q[1]='\0'; /* set I for tags below */ if (indent>=0 && scnt>=indent) scnt-=indent; if (!fPara && !fIQS) { if (fChangeleft) I+=(scnt>ccnt)?scnt:ccnt; else I+=scnt; } I0=I; /*** tags and filler spaces ***/ iq=0; falluc=1; for (q=plain; *p; p++) { iscnt=0; if (*p==' ') { for (r=p; *r==' '; r++) { iscnt++; spcsqz++; } s_sum+=iscnt; s_cnt++; if (iscnt>1 && !scnt2 && *p==' ') scnt2=iscnt; if (iscnt>2) { bs_cnt++; bs_sum+=iscnt; } /* keep track of large gaps */ iscnt--; /* leave last space for tail portion of loop */ /* write out spaces */ if (fQS && iscnt<3) { p=r-1; iscnt=0; } /* reduce strings of <3 spaces to 1 */ /* else if (fQS && iscnt>=3) { replace with tab? } */ else { for (i=0; iplain && q[-1]=='+') { /* bold plus/minus(!) */ q[-1]=c_plusminus; while (*p=='\b' && p[1]=='_') p+=2; continue; } else if ((*p=='_' && p[1]=='\b' && p[2]!='_' && p[3]!='\b') || (*p=='\b' && p[1]=='_')) { /* italics */ if (tag!=ITALICS && hl>=0) { tagadd(tag, hl, I+iq); hl=-1; } if (hl==-1) hl=I+iq; tag=ITALICS; p+=2; } else if (*p=='_' && p[2]==p[4] && p[1]=='\b' && p[3]=='\b' && p[2]!='_') { /* bold italics (for Solaris) */ for (p+=2; *p==p[2] && p[1]=='\b';) p+=2; if (tag!=BOLDITALICS && hl>=0) { tagadd(tag, hl, I+iq); hl=-1; } if (hl==-1) hl=I+iq; tag=BOLDITALICS; } else if (*p==p[2] && p[1]=='\b') { /* boldface */ while (*p==p[2] && p[1]=='\b') p+=2; if (tag!=BOLD && hl>=0) { tagadd(tag, hl, I+iq); hl=-1; } if (hl==-1) hl=I+iq; tag=BOLD; } else if (p[1]=='\b' && ((*p=='o' && p[2]=='+') || (*p=='+' && p[2]=='o')) ) { /* bullets */ p+=2; while (p[1]=='\b' && (*p=='o' || p[2]=='+') ) p+=2; /* bold bullets(!) */ *q++=c_bullet; iq++; continue; } else if (*p=='\b' && p>plain && p[-1]=='o' && p[1]=='+') { /* OSF bullets */ while (*p=='\b' && p[1]=='+') p+=2; /* bold bullets(!) */ q[-1]=c_bullet; p--; continue; } else if (p[1]=='\b' && *p=='+' && p[2]=='_') { /* plus/minus */ p+=2; *q++=c_plusminus; iq++; continue; } else if (p[1]=='\b' && *p=='|' && p[2]=='-') { /* dagger */ *q++=c_dagger; iq++; p+=2; continue; } else if (*p=='\b') { /* supress unattended backspaces */ continue; } else if (*p=='\x1b' /*&& (p[1]=='9'||p[1]=='8')*/) { p++; if (*p=='[') { p++; if (*p=='1' && hl==-1) { /* stash attributes in "invalid" array element */ efirst=I+iq; etype=BOLD; /*hl=I+iq; tag=BOLD; -- faces immediate end of range */ } else if (*p=='0' /*&& hl>=0 && hl2==-1 && tags[MAXTAGS].first=0 || isupper(p[1]) || (p[1]=='_' && p[2]!='\b') || p[1]=='&')) { if (hl==-1 && efirst==-1) { hl=I+iq; tag=SMALLCAPS; } } else { /* end of tag, one way or another */ /* collect tags in this pass, interspersed later if need be */ /* can't handle overlapping tags */ if (hl>=0) { if (hl2==-1) tagadd(tag, hl, I+iq); hl=-1; } } /** non-backspace related filtering **/ /* case statement here in place of if chain? */ /* Tk 3.x's text widget tabs too crazy if (*p==' ' && strncmp(" ",p,5)==0) { xputchar('\t'); i+=5-1; ci++; continue; } else */ /* copyright symbol: too much work for so little if (p[i]=='o' && (strncmp("opyright (C) 19",&p[i],15)==0 || strncmp("opyright (c) 19",&p[i],15)==0)) { printf("opyright \xd3 19"); tagadd(SYMBOL, ci+9, ci+10); i+=15-1; ci+=13; continue; } else */ if (*p=='(' && q>plain && (isalnum(q[-1])||strchr(manvalid/*"._-+"*/,q[-1])!=NULL) && strcoloncmp(&p[1],')',vollist) /* && p[1]!='s' && p[-1]!='`' && p[-1]!='\'' && p[-1]!='"'*/ ) { hl2=I+iq; for (r=q-1; r>=plain && (isalnum(*r)||strchr(manvalid/*"._-+:"*/,*r)!=NULL); r--) hl2--; /* else ref to a function? */ /* maybe save position of opening paren so don't highlight it later */ } else if (*p==')' && hl2!=-1) { /* don't overlap tags on man page referenes */ while (tagc>0 && tags[tagc-1].last>hl2) tagc--; tagadd(MANREF, hl2, I+iq+1); hl2=hl=-1; } else if (hl2!=-1) { /* section names are alphanumic or '+' for C++ */ if (!isalnum(*p) && *p!='+') hl2=-1; } /*assert(*p!='\0');*/ if (!*p) break; /* not just safety check -- check out sgmls.1 */ *q++=*p; /* falluc = falluc && (isupper(*p) || isspace(*p) || isdigit(*p) || strchr("-+&_'/()?!.,;",*p)!=NULL);*/ falluc = falluc && !islower(*p); if (!scnt2) { *ph++=*p; phraselen++; } iq+=iscnt+1; } if (hl>=0) tagadd(tag, hl, I+iq); else if (efirst>=0) tagadd(etype, efirst, I+iq); *q=*ph='\0'; linelen=iq+ccnt; /* special case for Solaris: if line has ONLY tags AND they SPAN line, convert to one tag */ fCodeline=0; if (tagc && tags[0].first==0 && tags[tagc-1].last==linelen) { fCodeline=1; j=0; /* invariant: at start of a tag */ for (i=0; fCodeline && iNOTAG && tags[i].type<=MANREF); assert(tags[i].first>=I0 && tags[i].last<=linelen+I0); assert(tags[i].first<=tags[i].last); /* verify for no overlap with other tags */ for (j=i+1; j=tags[j].last*/); } } } /* buf[] == input text (read only) plain[] == output (initial, trailing spaces stripped; tabs=>spaces; underlines, overstrikes => tag array; spaces squeezed, if requested) ccnt = count of changebars scnt = count of initial spaces linelen = length result in plain[] */ /*#define MAXINDENT 15*/ /*#define HEADFOOTMATCH 20*/ int fHead=0; int fFoot=0; static void preformatted_filter(void) { const int MINRM=50; /* minimum column for right margin */ const int MINMID=20; const int HEADFOOTSKIP=20; const int HEADFOOTMAX=25; int curtag; char *p,*r; char head[MAXBUF]=""; /* first "word" */ char foot[MAXBUF]=""; int header_m=0, footer_m=0; int headlen=0, footlen=0; /* int line=1-1; */ int i,j,k,l,off; int sect=0,subsect=0,bulpair=0,osubsect=0; int title=1; int oscnt=-1; int empty=0,oempty; int fcont=0; int Pnew=0,I0; float s_avg=0.0; int spaceout; int skiplines=0; int c; /* try to keep tabeginend[][] in parallel with enum tagtype */ assert(tagbeginend[ITALICS][0]==BEGINITALICS); assert(tagbeginend[MANREF][1]==ENDMANREF); in++; /* lookahead = current character, in points to following */ /* for (i=0; i=2 && bs_cnt<=5 && ((float) bs_sum / (float) bs_cnt)>3.0)); if (finTable) { if (!fotable) (*fn)(BEGINTABLE); } else if (fotable) { (*fn)(ENDTABLE); I=I0; tagc=0; filterline(buf,plain); /* rescan first line out of table */ } #endif s_avg=(float) s_sum; if (s_cnt>=2) { /* don't count large second space gap */ if (scnt2) s_avg= (float) (s_sum - scnt2) / (float) (s_cnt-1); else s_avg= (float) (s_sum) / (float) (s_cnt); } p=plain; /* points to current character in plain */ /*** determine header and global indentation ***/ if (/*fMan && (*/!fHead || indent==-1/*)*/) { if (!linelen) continue; if (!*header) { /* check for missing first header--but this doesn't catch subsequent pages */ if (stricmp(p,"NAME")==0 || stricmp(p,"NOMBRE")==0) { /* works because line already filtered */ indent=scnt; /*filterline(buf,plain);*/ scnt=0; I=I0; fHead=1; } else { fHead=1; (*fn)(BEGINHEADER); /* grab header and its first word */ strcpy(header,p); if ((header_m=HEADFOOTSKIP)>linelen) header_m=0; strcpy(head,phrase); headlen=phraselen; la_gets(buf); filterline(buf,plain); if (linelen) { strcpy(header2,plain); if (strincmp(plain,"Digital",7)==0 || strincmp(plain,"OSF",3)==0) { fFoot=1; fSubsections=0; } } (*fn)(ENDHEADER); tagc=0; continue; } } else { /* some idiot pages have a *third* header line, possibly after a null line */ if (*header && scnt>MINMID) { strcpy(header3,p); ncnt=0; continue; } /* indent of first line ("NAME") after header sets global indent */ /* check '<' for Plan 9(?) */ if (*p!='<') { indent=scnt; I=I0; scnt=0; } else continue; } /* if (indent==-1) continue;*/ } if (!lindent && scnt) lindent=scnt; /*printf("lindent = %d, scnt=%d\n",lindent,scnt);*/ /**** for each ordinary line... *****/ /*** skip over global indentation */ oempty=empty; empty=(linelen==0); if (empty) {ncnt++; continue;} /*** strip out per-page titles ***/ if (/*fMan && (*/scnt==0 || scnt>MINMID/*)*/) { /*printf("***ncnt = %d, fFoot = %d, line = %d***", ncnt,fFoot,AbsLine);*/ if (!fFoot && !isspace(*p) && (scnt>5 || (*p!='-' && *p!='_')) && /* don't add ncnt -- AbsLine gets absolute line number */ (((ncnt>=2 && AbsLine/*+ncnt*/>=61/*was 58*/ && AbsLine/*+ncnt*/<70) || (ncnt>=4 && AbsLine/*+ncnt*/>=59 && AbsLine/*+ncnt*/<74) || (ncnt && AbsLine/*+ncnt*/>=61 && AbsLine/*+ncnt*/<=66)) && (/*lookahead!=' ' ||*/ (s_cnt>=1 && s_avg>1.1) || !falluc) ) ) { (*fn)(BEGINFOOTER); /* grab footer and its first word */ strcpy(footer,p); /* if ((footer_m=linelen-HEADFOOTSKIP)<0) footer_m=0;*/ if ((footer_m=HEADFOOTSKIP)>linelen) footer_m=0; /*grabphrase(p);*/ strcpy(foot,phrase); footlen=phraselen; /* permit variations at end, as for SGI "Page N", but keep minimum length */ if (footlen>3) footlen--; la_gets(buf); filterline(buf,plain); if (linelen) strcpy(footer2,plain); title=1; (*fn)(ENDFOOTER); tagc=0; /* if no header on first page, try again after first footer */ if (!fFoot && *header=='\0') fHead=0; /* this is dangerous */ fFoot=1; continue; } else /* a lot of work, but only for a few lines (about 4%) */ if (fFoot && (scnt==0 || scnt+indent>MINMID) && ( (headlen && strncmp(head,p,headlen)==0) || strcmp(header2,p)==0 || strcmp(header3,p)==0 || (footlen && strncmp(foot,p,footlen)==0) || strcmp(footer2,p)==0 /* try to recognize lines with dates and page numbers */ /* skip into line */ || (header_m && header_mnew paragraph, line mode=>blank lines */ /* need to chop up lines for Roff */ /*tabgram[scnt]++;*/ if (title) ncnt=(scnt!=oscnt || (/*scnt<4 &&*/ isupper(*p))); itabcnt = scnt/5; if (CurLine==1) {ncnt=0; tagc=0;} /* gobble all newlines before first text line */ sect = (scnt==0 && isupper(*p)); subsect = (fSubsections && (scnt==2||scnt==3)); if ((sect || subsect) && ncnt>1) ncnt=1; /* single blank line between sections */ (*fn)(BEGINLINE); if (/*fPara &&*/ ncnt) Pnew=1; title=0; /*ncnt=0;--moved down*/ /*if (finTable) (*fn)(BEGINTABLELINE);*/ oscnt=scnt; /*fotable=finTable;*/ /* let output modules decide what to do at the start of a paragraph if (fPara && !Pnew && (prevcmd==BEGINBODY || prevcmd==BEGINBULTXT)) { putchar(' '); I++; } */ /*** identify structural sections and notify fn */ /*if (fMan) {*/ /* bulpair = (scnt<7 && (*p==c_bullet || *p=='-'));*/ /* decode the below */ bulpair = ((!auxindent || scnt!=lindent+auxindent) /*!bulpair*/ && ((scnt>=2 && scnt2>5) || scnt>=5 || (tagc>0 && tags[0].first==scnt) ) /* scnt>=2?? */ && (((*p==c_bullet || strchr("-+.",*p)!=NULL || falluc) && (ncnt || scnt2>4)) || (scnt2-s_avg>=2 && phrase[phraselen-1]!='.') || (scnt2>3 && s_cnt==1) )); if (bulpair) { if (tagc>0 && tags[0].first==scnt) { k=tags[0].last; for (l=1; l=5 && kccnt)?(scnt-ccnt):0; if (fILQS) { if (spaceout>=lindent) spaceout-=lindent; else spaceout=0; } if (auxindent) { if (spaceout>=auxindent) spaceout-=auxindent; else spaceout=0; } if (fNORM) { if (itabcnt>0) (*fn)(ITAB); for (i=0; i<(scnt%5); i++) putchar(' '); } else printf("%*s",spaceout,""); } /*** iterate over each character in line, ***/ /*** handling underlining, tabbing, copyrights ***/ off=(!fIQS&&!fPara)?scnt:0; for (i=0, p=plain, curtag=0, fcont=0; *p; p++,i++,fcont=0) { /* interspersed presentation signals */ /* start tags in reverse order of addition (so structural first) */ if (curtag \-opt */ if (p==plain || (isspace(p[-1]) && !isspace(p[1]))) { (*fn)(CHARDASH); fcont=1; } break; } /* troublemaker characters */ c = (*p)&0xff; if (!fcont && fcharout) { if (strchr(escchars,c)!=NULL) { putchar('\\'); putchar(c); I++; } else if (strchr((char *)trouble,c)!=NULL) { (*fn)(c); fcont=1; } else { putchar(c); I++; } } /*default:*/ if (curtag */ falluc = falluc && !islower(*in); *p++ = *in++; } if (*in) in++; *p='\0'; /* normalize commands */ p=tmpbuf; q=buf; /* copy from tmpbuf to buf */ /* no spaces between command-initiating period and command letters */ if (*p=='\'') { *p='.'; } /* what's the difference? */ if (*p=='.') { *q++ = *p++; while (isspace(*p)) p++; } /* convert lines with tabs to tables? */ fsourceTab=0; /* if comment at start of line, OK */ /* dynamically determine iff Tcl/Tk page by scanning comments */ begin = p; if (*p=='\\' && *(p+1)=='"') { if (!fTclTk && strstr(p+1,"supplemental macros used in Tcl/Tk")!=NULL) fTclTk=1; if (fTclTk) p+=2; } while (*p) { if (*p=='\t') fsourceTab++; if (*p=='\\') { p++; if (*p=='n') { p++; if (*p=='(') { p++; name[0]=*p++; name[1]=*p++; name[2]='\0'; } else { name[0]=*p++; name[1]='\0'; } *q='0'; *(q+1)='\0'; /* defaults to 0, in case doesn't exist */ for (i=0; ibuf && isspace(*q)) q--; /* trim tailing whitespace */ q++; *q='\0'; } else { /* verbatim character (often a backslash) */ *q++ = '\\'; /* postpone interpretation (not the right thing but...) */ *q++ = *p++; } } else *q++ = *p++; } /* dumb Digital--later */ /*if (q-3>plain && q[-1]=='{' && q[-2]=='\\' && q[-3]==' ') q[-3]='\n'; }*/ /* close off buf */ *q='\0'; #if 0 if (q>buf && q[-1]=='\\' && *in=='.') /* append next line * / else break;*/ #endif break; } /*printf("*ret = |%s|\n", ret!=NULL?ret : "NULL");*/ return ret; } /* dump characters from buffer, signalling right tags along the way */ /* all this work to introduce an internal second pass to recognize man page references */ /* now for HTTP references too */ int sI=0; /* use int linelen from up top */ int fFlush=1; static void source_flush(void) { int i,j; char *p,*q,*r; int c; int manoff,posn; if (!sI) return; plain[sI] = '\0'; /* flush called often enough that all man page references are at end of text to be flushed */ /* find man page ref */ if (sI>=4/*+1*/ && (plain[sI-(manoff=1)-1]==')' || plain[sI-(manoff=0)-1]==')')) { for (q=&plain[sI-manoff-1-1]; q>plain && isalnum(*q) && *q!='('; q--) /* nada */; if (*q=='(' && strcoloncmp(&q[1],')',vollist)) { r=q-1; if (*r==' ' && (sectheadid==SEEALSO || /*single letter volume */ *(q+2)==')' || *(q+3)==')')) r--; /* permitted single intervening space */ for ( ; r>=plain && (isalnum(*r) || strchr(manvalid,*r)!=NULL); r--) /* nada */; r++; if (isalpha(*r) && r= posn) tagc--;*/ /* add MANREF tags */ strcpy(hitxt,r); tagadd(BEGINMANREF, posn, 0); tagadd(ENDMANREF, sI-manoff-1+1, 0); } } /* HTML hyperlinks */ } else if (fURL && sI>=4 && (p=strstr(plain,"http"))!=NULL) { i = p-plain; tagadd(BEGINMANREF, i, 0); for (j=0; i=LINEBREAK && c==' ') { (*fn)(ENDLINE); linelen=0; } else { /* normal character */ xputchar(c); if (fcharout) linelen++; } /*if (linelen>=LINEBREAK && c==' ') { (*fn)(ENDLINE); linelen=0; } -- leaves space at end of line*/ } /* dump tags at end */ /*for ( ; j program code */ styles[++style] = BOLDITALICS; stagadd(BEGINBOLDITALICS); break; case '1': case '0': case 'R': case 'P': /* back to Roman */ /*sputchar(' '); -- taken out; not needed, I hope */ funwind=1; break; case '-': p++; break; } break; case '(': /* multicharacter macros */ p++; for (i=0; (unsigned)i can't because next line might start with a command */ supresseol=1; p++; break; case '-': /* minus sign */ sputchar(CHARDASH); p++; break; #if 0 case '^': /* end stylings? (found in Solaris) */ p++; #endif case 'N': p++; if (*p == '\'') { char *tmp; p++; if ((tmp = strchr(p, '\'')) != NULL) { sputchar(atoi(p)); p = tmp + 1; } } break; default: /* unknown escaped character */ sputchar(*p++); } } else { /* normal character */ if (*p) sputchar(*p++); } /* unwind character formatting stack */ if (funwind) { for ( ; style>=0; style--) { if (styles[style]==BOLD) { stagadd(ENDBOLD); } else if (styles[style]==ITALICS) { stagadd(ENDITALICS); } else { stagadd(ENDBOLDITALICS); } } /* else error */ assert(style==-1); funwind=0; } /* check for man page reference and flush buffer if safe */ /* postpone check until after following character so catch closing tags */ if ((sI>=4+1 && plain[sI-1-1]==')') || /* (plain[sI-1]==' ' && (q=strchr(plain,' '))!=NULL && q<&plain[sI-1])) */ (plain[sI-1]==' ' && !isalnum(plain[sI-1-1]))) { /* regardless, flush buffer */ source_flush(); } } if (*p && *p!=' ') p++; /* skip over end character */ free(pstart); return (char *)pin + (p - pstart); } /* oh, for function overloading. inlined by compiler, probably */ static char * source_out(const char *p) { return source_out0(p,'\0'); } static char * source_out_word(const char *p) { char end = ' '; while (*p && isspace(*p)) p++; if (*p=='"' /* || *p=='`' ? */) { end = *p; p++; } p = source_out0(p,end); /*while (*p && isspace(*p)) p++;*/ return (char *)p; } static void source_struct(enum command cmd) { source_out("\\fR\\s0"); /* don't let run-on stylings run past structural units */ source_flush(); if (cmd==SHORTLINE) linelen=0; (*fn)(cmd); } #define checkcmd(str) strcmp(cmd,str)==0 int finnf=0; static void source_line(char *p); static void source_subfile(char *newin) { char *p; char *oldin = in; sublevel++; in = newin; while ((p=source_gets())!=NULL) { source_line(p); } in = oldin; sublevel--; } /* have to delay acquisition of list tag */ static void source_list(void) { static int oldlisttype; /* OK to have just one because nested lists done with RS/RE */ char *q; int i; /* guard against empty bullet */ for (i=0, q=plain; i or other comment closer, but unlikely */ /* structural commands */ } else if (checkcmd("TH")) { /* sample: .TH CC 1 "Dec 1990" */ /* overrides command line -- should fix this */ if (!finitDoc) { while (isspace(*p)) p++; if (*p) { q=strchr(p, ' '); if (q!=NULL) { *q++='\0'; strcpy(manName, p); for (p=q; isspace(*p); p++) /*nada*/; if (*p) { q=strchr(p,' '); if (q!=NULL) *q++='\0'; strcpy(manSect, p); } } } sI=0; finitDoc=1; (*fn)(BEGINDOC); /* emit information in .TH line? */ } /* else complain about multiple definitions? */ } else if (checkcmd("SH") || checkcmd("Sh")) { /* section title */ while (indent) { source_command("RE"); } source_flush(); pop(BEGINSECTION); /* before reset sectheadid */ if (*p) { if (*p=='"') { p++; q=p; while (*q && *q!='"') q++; *q='\0'; } finnf=0; for (j=0; (sectheadid=j) leave to output format */ /* HTML handles tables but not tabs, Tk's text tabs but not tables */ /* does cause a linebreak */ stagadd(BEGINBODY); } else if (checkcmd("ce")) { /* get line count, recursively filter for that many lines */ if (sscanf(p, "%d", &i)) { source_struct(BEGINCENTER); for (; i>0 && (p=source_gets())!=NULL; i--) source_line(p); source_struct(ENDCENTER); } /* limited selection of control structures */ } else if (checkcmd("if") || (checkcmd("ie"))) { /* if cmd, if command and else on next line */ supresseol=1; ie = checkcmd("ie"); mylastif=lastif; if (*p=='!') { invcond=1; p++; } if (*p=='n') { cond=1; p++; } /* masquerading as nroff the right thing to do? */ else if (*p=='t') { cond=0; p++; } else if (*p=='(' || *p=='-' || *p=='+' || isdigit(*p)) { if (*p=='(') p++; nif0=atof(p); if (*p=='-' || *p=='+') p++; while (isdigit(*p)) p++; op = *p++; /* operator: =, >, < */ if (op==' ') { cond = (nif0!=0); } else { nif1=atoi(p); while (isdigit(*p)) p++; if (*p==')') p++; if (op=='=') cond = (nif0==nif1); else if (op=='<') cond = (nif0' -- ignore >=, <= */ cond = (nif0>nif1); } } else if (!isalpha(*p)) { /* usually quote, ^G in Digital UNIX */ /* gobble up comparators between delimiters */ delim = *p++; q = if0; while (*p && *p!=delim) { *q++=*p++; } *q='\0'; p++; q = if1; while (*p && *p!=delim) { *q++=*p++; } *q='\0'; p++; cond = (strcmp(if0,if1)==0); } else cond=0; /* a guess, seems to be right bettern than half the time */ if (invcond) cond=1-cond; while (isspace(*p)) p++; lastif = cond; if (strncmp(p,"\\{",2)==0) { /* rather than handle groups here, have turn on/off output flag? */ p+=2; while (isspace(*p)) p++; /* {{ */ while (strncmp(p,".\\}",3)!=0 || strncmp(p,"\\}",2)!=0 /*Solaris*/) { if (cond) source_line(p); if ((p=source_gets())==NULL) break; } } else if (cond) source_line(p); if (ie) source_line(source_gets()); /* do else part with prevailing lastif */ lastif=mylastif; } else if (checkcmd("el")) { mylastif=lastif; /* should centralize gobbling of groups */ cond = lastif = !lastif; if (strncmp(p,"\\{",2)==0) { p+=2; while (isspace(*p)) p++; while (strncmp(p,".\\}",3)!=0 || strncmp(p,"\\}",2)!=0 /*Solaris*/) { if (cond) source_line(p); if ((p=source_gets())==NULL) break; } } else if (cond) source_line(p); lastif=mylastif; } else if (checkcmd("ig")) { /* "ignore group" */ strcpy(endig,".."); if (*p) { endig[0]='.'; strcpy(&endig[1],p); } while ((p=source_gets())!=NULL) { if (strcmp(p,endig)==0) break; if (!lastif) source_line(p); /* usually ignore line, except in one weird case */ } /* macros and substitutions */ } else if (checkcmd("de")) { /* grab key */ q=p; while (*q && !isspace(*q)) q++; *q='\0'; /* if already have a macro of that name, override it */ /* could use a good dictionary class */ for (insertat=0; insertattblspanmax) tblspanmax=i;*/ tbl[tblc++][i]=""; /* mark end */ if (*p=='.') break; } tbli=0; source_struct(BEGINTABLE); while ((p=source_gets())!=NULL) { if (strncmp(p,".TE",3)==0) break; if (*p=='.') { source_line(p); continue; } /* count number of entries on line. if >1, can use to set tableSep */ insertat=0; for (j=0; *tbl[tbli][j]; j++) if (*tbl[tbli][j]!='s') insertat++; if (!tableSep && insertat>1) { if (fsourceTab) tableSep='\t'; else tableSep='@'; } source_struct(BEGINTABLELINE); if (strcmp(p,"_")==0 || /* double line */ strcmp(p,"=")==0) { source_out(" "); /*stagadd(HR);*/ /* empty row -- need ROWSPAN for HTML */ continue; } for (i=0; *tbl[tbli][i] && *p; i++) { tblcellspan=1; tblcellformat = tbl[tbli][i]; if (*tblcellformat=='^') { /* vertical span => blank entry */ tblcellformat="l"; } else if (*tblcellformat=='|') { /* stagadd(VBAR); */ continue; } else if (strchr("lrcn", *tblcellformat)==NULL) { tblcellformat="l"; /*continue;*/ } while (strncmp(tbl[tbli][i+1],"s",1)==0) { tblcellspan++; i++; } source_struct(BEGINTABLEENTRY); if (toupper(tblcellformat[1])=='B') stagadd(BEGINBOLD); else if (toupper(tblcellformat[1])=='I') stagadd(BEGINITALICS); /* not supporting DEC's w() */ if (strcmp(p,"T{")==0) { /* DEC, HP */ while (strncmp(p=source_gets(),"T}",2)!=0) source_line(p); p+=2; if (*p) p++; } else { p = source_out0(p, tableSep); } if (toupper(tblcellformat[1])=='B') stagadd(ENDBOLD); else if (toupper(tblcellformat[1])=='I') stagadd(ENDITALICS); source_struct(ENDTABLEENTRY); } if (tbli+1 lines--on infinite scroll */ } static void source_line(char *p) { /*stagadd(BEGINLINE);*/ char *cmd=p; if (p==NULL) return; /* bug somewhere else, but where? */ #if 0 if (*p!='.' && *p!='\'' && !finlist) { if (fsourceTab && !fosourceTab) { tblc=1; tbli=0; tableSep='\t'; tbl[0][0]=tbl[0][1]=tbl[0][2]=tbl[0][3]=tbl[0][4]=tbl[0][5]=tbl[0][6]=tbl[0][7]=tbl[0][8]="l"; source_struct(BEGINTABLE); finTable=1; } else if (!fsourceTab && fosourceTab) { source_struct(ENDTABLE); finTable=0; } fosourceTab=fsourceTab; } #endif if (*p=='.' /*|| *p=='\'' -- normalized */) { /* command == starts with "." */ p++; supresseol=1; source_command(p); } else if (!*p) { /* blank line */ /*source_command("P");*/ ncnt=1; source_struct(BEGINLINE); ncnt=0; /* empty line => paragraph break */ #if 0 } else if (fsourceTab && !finlist /* && pmode */) { /* can't handle tabs, so try tables */ source_struct(BEGINTABLE); tblcellformat = "l"; do { source_struct(BEGINTABLELINE); while (*p) { source_struct(BEGINTABLEENTRY); p = source_out0(p, '\t'); source_struct(ENDTABLEENTRY); } source_struct(ENDTABLELINE); } while ((p=source_gets())!=NULL && fsourceTab); source_struct(ENDTABLE); source_line(p); #endif } else { /* otherwise normal text */ source_out(p); if (finnf || isspace(*cmd)) source_struct(SHORTLINE); } if (!supresseol && !finnf) { source_out(" "); if (finlist) source_list(); } supresseol=0; /*stagadd(ENDLINE);*/ } static void source_filter(void) { char *p = in, *q; char *oldv,*newv,*shiftp,*shiftq,*endq; int lenp,lenq; int i,on1,on2,nn1,nn2,first; int insertcnt=0, deletecnt=0, insertcnt0; int nextDiffLine=-1; char diffcmd, tmpc, tmpendq; AbsLine=0; /* just count length of macro table! */ for (i=0; macro[i].key!=NULL; i++) /*empty*/; macrocnt = i; /* {{ dumb Digital puts \\} closers on same line */ for (p=in; (p=strstr(p," \\}"))!=NULL; p+=3) *p='\n'; sI=0; /* (*fn)(BEGINDOC); -- done at .TH or first .SH */ /* was: source_subfile(in); */ while (fDiff && fgets(diffline, MAXBUF, difffd)!=NULL) { /* requirements: no context lines, no errors in files, ... change-command: 8a12,15 or 5,7c8,10 or 5,7d3 < from-file-line < from-file-line... -- > to-file-line > to-file-line... */ for (q=diffline; ; q++) { diffcmd=*q; if (diffcmd=='a'||diffcmd=='c'||diffcmd=='d') break; } if (sscanf(diffline, "%d,%d", &on1,&on2)==1) on2=on1-1+(diffcmd=='d'||diffcmd=='c'); if (sscanf(++q, "%d,%d", &nn1,&nn2)==1) nn2=nn1-1+(diffcmd=='a'||diffcmd=='c'); deletecnt = on2-on1+1; insertcnt = nn2-nn1+1; nextDiffLine = nn1; /*assert(nextDiffLine>=AbsLine); -- can happen if inside a macro? */ if (nextDiffLine */ do { p = oldv = fgets(diffline, MAXBUF, difffd); p[strlen(p)-1]='\0'; /* fgets's \n ending => \0 */ deletecnt--; } while (deletecnt && *p=='.'); /* throw out commands in old version */ q = newv = source_gets(); insertcnt--; while (insertcnt && *q=='.') { source_line(q); insertcnt--; } if (*p=='.' || *q=='.') break; /* make larger chunk for better diff -- but still keep away from commands */ lenp=strlen(p); lenq=strlen(q); while (deletecnt && MAXBUF-lenq>80*2) { fgetc(difffd); fgetc(difffd); /* skip '<' */ if (ungetc(fgetc(difffd),difffd)=='.') break; p=&diffline[lenp]; *p++=' '; lenp++; fgets(p, MAXBUF-lenp, difffd); p[strlen(p)-1]='\0'; lenp+=strlen(p); deletecnt--; } while (insertcnt && *in!='.' && MAXBUF-lenq>80*2) { if (newv!=diffline2) { strcpy(diffline2,q); newv=diffline2; } q=source_gets(); diffline2[lenq]=' '; lenq++; strcpy(&diffline2[lenq],q); lenq+=strlen(q); insertcnt--; } /* common endings */ p = &p[strlen(oldv)]; q=&q[strlen(newv)]; while (p>oldv && q>newv && p[-1]==q[-1]) { p--; q--; } if ((p>oldv && p[-1]=='\\') || (q>newv && q[-1]=='\\')) while (*p && *q && !isspace(*p)) { p++; q++; } /* steer clear of escapes */ tmpendq=*q; *p=*q='\0'; endq=q; p=oldv; q=newv; while (*p && *q) { /* common starts */ newv=q; while (*p && *q && *p==*q) { p++; q++; } if (q>newv) { tmpc=*q; *q='\0'; source_line(newv); *q=tmpc; } /* too hard to read */ /* difference: try to find hunk of p in remainder of q */ if (strlen(p)<15 || (shiftp=strchr(&p[15],' ') /*|| shiftp-p>30*/)==NULL) break; shiftp++; /* include the space */ tmpc=*shiftp; *shiftp='\0'; shiftq=strstr(q,p); *shiftp=tmpc; /* includes space */ if (shiftq!=NULL) { /* call that part of q inserted */ tmpc=*shiftq; *shiftq='\0'; stagadd(BEGINDIFFA); source_line(q); stagadd(ENDDIFFA); source_line(" "); *shiftq=tmpc; q=shiftq; } else { /* call that part of p deleted */ shiftp--; *shiftp='\0'; /* squash the trailing space */ stagadd(BEGINDIFFD); source_line(p); stagadd(ENDDIFFD); source_line(" "); p=shiftp+1; } /*#endif*/ } if (*p) { stagadd(BEGINDIFFD); source_line(p); stagadd(ENDDIFFD); } if (*q) { stagadd(BEGINDIFFA); source_line(q); stagadd(ENDDIFFA); } if (tmpendq!='\0') { *endq=tmpendq; source_line(endq); } source_line(" "); } /* even if diffcmd=='c', could still have remaining old version lines */ first=1; while (deletecnt--) { fgets(diffline, MAXBUF, difffd); if (diffline[2]!='.') { if (first) { stagadd(BEGINDIFFD); first=0; } source_line(&diffline[2]); /* don't do commands; skip initial '<' */ } } if (!first) { stagadd(ENDDIFFD); source_line(" "); } /* skip over duplicated from old */ if (diffcmd=='c') while (insertcnt0--) fgets(diffline, MAXBUF, difffd); /* even if diffcmd=='c', could still have remaining new version lines */ first=1; nextDiffLine = AbsLine + insertcnt; while (insertcnt--) fgets(diffline, MAXBUF, difffd); /* eat duplicate text of above */ while (/*insertcnt--*/AbsLine" }, { 'S', 0, "source", "(ource of man page passed in)" }, /* autodetected */ { 'F', 0, "formatted:format", "(ormatted man page passed in)" }, /* autodetected */ { 'r', 1, "reference:manref:ref", " " }, { 'l', 1, "title", " " }, { 'V', 1, "volumes:vol", "(olume) <colon-separated list>" }, { 'U', 0, "url:urls", "(RLs as hyperlinks)" }, /* following options apply to formatted pages only */ { 'b', 0, "subsections:sub", " (show subsections)" }, { 'k', 0, "keep:head:foot:header:footer", "(eep head/foot)" }, { 'n', 1, "name", "(ame of man page) <string>" }, { 's', 1, "section:sect", "(ection) <string>" }, { 'p', 0, "paragraph:para", "(aragraph mode toggle)" }, { 't', 1, "tabstop:tabstops", "(abstops spacing) <number>" }, { 'N', 0, "normalize:normal", "(ormalize spacing, changebars)" }, { 'y', 0, "zap:nohyphens", " (zap hyphens toggle)" }, { 'K', 0, "nobreak", " (declare that page has no breaks)" }, /* autodetected */ { 'd', 1, "diff", "(iff) <file> (diff of old page source to incorporate)" }, { 'M', 1, "message", "(essage) <text> (included verbatim at end of Name section)" }, /*{ 'l', 0, "number lines", "... can number lines in a pipe" } */ /*{ 'T', 0, "tables", "(able agressive parsing ON)" },*/ /* { 'c', 0, "changeleft:changebar", "(hangebarstoleft toggle)" }, -- default is perfect */ /*{ 'R', 0, "reflow", "(eflow text lines)" },*/ { 'R', 1, "rebus", "(ebus words for TkMan)" }, { 'C', 0, "TclTk", " (enable Tcl/Tk formatting)" }, /* autodetected */ /*{ 'D', 0, "debug", "(ebugging mode)" }, -- dump unrecognized macros, e.g.*/ { 'o', 0, "noop", " (no op)" }, { 'O', 0, "noop", " <arg> (no op with arg)" }, { 'q', 0, "quiet", "(uiet--don't report warnings)" }, { 'h', 0, "help", "(elp)" }, /*{ '?', 0, "help", " (help)" }, -- getopt returns '?' as error flag */ { 'v', 0, "version", "(ersion)" }, { '\0', 0, "", NULL } }; /* calculate strgetopt from options list */ for (i=0,p=strgetopt; option[i].letter!='\0'; i++) { *p++ = option[i].letter; /* check for duplicate option letters */ assert(strchr(strgetopt,option[i].letter)==&p[-1]); if (option[i].arg) *p++=':'; } *p='\0'; /* spot check construction of strgetopt */ assert(p<strgetopt+80); assert(strlen(strgetopt)>10); assert(strchr(strgetopt,'f')!=NULL); assert(strchr(strgetopt,'v')!=NULL); assert(strchr(strgetopt,':')!=NULL); #ifdef macintosh extern void InitToolbox(); InitToolbox(); #endif /* count, sort exception strings */ for (lcexceptionslen=0; (p=lcexceptions[lcexceptionslen])!=NULL; lcexceptionslen++) /*empty*/; qsort(lcexceptions, lcexceptionslen, sizeof(char*), lcexceptionscmp); /* map long option names to single letters for switching */ /* (GNU probably has a reusable function to do this...) */ /* deep six getopt in favor of integrated long names + letters? */ argvch = malloc(argc * sizeof(char*)); p = argvbuf = malloc(argc*3 * sizeof(char)); /* either -<char>'\0' or no space used */ for (i=0; i<argc; i++) argvch[i]=argv[i]; /* need argvch[0] for getopt? */ argv0 = mystrdup(argv[0]); for (i=1; i<argc; i++) { if (argv[i][0]=='-' && argv[i][1]=='-') { if (argv[i][2]=='\0') break; /* end of options */ for (j=0; option[j].letter!='\0'; j++) { if (strcoloncmp2(&argv[i][2],'\0',option[j].longnames,0)) { argvch[i] = p; *p++ = '-'; *p++ = option[j].letter; *p++ = '\0'; if (option[j].arg) i++; /* skip arguments of options */ break; } } if (option[j].letter=='\0') fprintf(stderr, "%s: unknown option %s\n", argv[0], argv[i]); } } /* pass through options to set defaults for chosen format */ setFilterDefaults("ASCII"); /* default to ASCII (used by TkMan's Glimpse indexing */ /* initialize header/footer buffers (save room in binary) */ for (i=0; i<CRUFTS; i++) { *cruft[i] = '\0'; } /* automatically done, guaranteed? */ /*for (i=0; i<MAXLINES; i++) { linetabcnt[i] = 0; } */ while ((c=getopt(argc,argvch,strgetopt))!=-1) { switch (c) { case 'k': fHeadfoot=1; break; case 'b': fSubsections=1; break; /* case 'c': fChangeleft=1; break; -- obsolete */ /* case 'R': fReflow=1; break;*/ case 'n': strcpy(manName,optarg); fname=1; break; /* name & section for when using stdin */ case 's': strcpy(manSect,optarg); break; /*case 'D': docbookpath = optarg; break;*/ case 'V': vollist = optarg; break; case 'l': manTitle = optarg; break; case 'r': manRef = optarg; if (strlen(manRef)==0 || strcmp(manRef,"-")==0 || strcmp(manRef,"off")==0) fmanRef=0; break; case 't': TabStops=atoi(optarg); break; /*case 'T': fTable=1; break; -- if preformatted doesn't work, if source automatic */ case 'p': fPara=!fPara; break; case 'K': fFoot=1; break; case 'y': fNOHY=1; break; case 'N': fNORM=1; break; case 'f': /* set format */ if (setFilterDefaults(optarg)) { fprintf(stderr, "%s: unknown format: %s\n", argv0, optarg); exit(1); } break; case 'F': fSource=0; break; case 'S': fSource=1; break; case 'd': difffd = fopen(optarg, "r"); if (difffd==NULL) { fprintf(stderr, "%s: can't open %s\n", argv0, optarg); exit(1); } /* read in a line at a time diff = filesuck(fd); fclose(fd); */ fDiff=1; break; case 'M': message = optarg; break; case 'C': fTclTk=1; break; case 'R': p = malloc(strlen(optarg)+1); strcpy(p, optarg); /* string may not be in writable address space */ oldp = ""; for (; *p; oldp=p, p++) { if (*oldp=='\0') rebuspat[rebuspatcnt++] = p; if (*p=='|') *p='\0'; } for (i=0; i<rebuspatcnt; i++) rebuspatlen[i] = strlen(rebuspat[i]); /* for strnlen() */ break; case 'q': fQuiet=1; break; case 'o': /*no op*/ break; case 'O': /* no op with arg */ break; case 'h': printf("rman"); helplen=strlen("rman"); /* linebreak options */ assert(helplen>0); for (i=0; option[i].letter!='\0'; i++) { desclen = strlen(option[i].desc); if (helplen+desclen+5 > helpbreak) { printf("\n%*s",helpispace,""); helplen=helpispace; } printf(" [-%c%s]", option[i].letter, option[i].desc); helplen += desclen+5; } if (helplen>helpispace) printf("\n"); printf("%*s [<filename>]\n",helpispace,""); exit(0); case 'v': /*case '?':*/ printf("PolyglotMan v" POLYGLOTMANVERSION "\n"); exit(0); default: fprintf(stderr, "%s: unidentified option -%c (-h for help)\n",argvch[0],c); exit(2); } } /* read from given file name(s) */ if (optind<argc) { processing = argvch[optind]; if (!fname) { /* if no name given, create from file name */ /* take name from tail of path */ if ((p=strrchr(argvch[optind],'/'))!=NULL) p++; else p=argvch[optind]; strcpy(manName,p); /* search backward from end for final dot. split there */ if ((p=strrchr(manName,'.'))!=NULL) { strcpy(manSect,p+1); *p='\0'; } } strcpy(plain,argvch[optind]); if (freopen(argvch[optind], "r", stdin)==NULL) { fprintf(stderr, "%s: can't open %s\n", argvch[0],argvch[optind]); exit(1); } } /* need to read macros, ok if fail; from /usr/lib/tmac/an => needs to be set in Makefile, maybe a searchpath */ /* if ((macros=fopen("/usr/lib/tmac/an", "r"))!=NULL) { in = File = filesuck(macros); lookahead = File[0]; source_filter(); free(File); } */ /* suck in whole file and just operate on pointers */ in = File = filesuck(stdin); /* minimal check for roff source: first character dot command or apostrophe comment */ /* MUST initialize lookahead here, BEFORE first call to la_gets */ if (fSource==-1) { lookahead = File[0]; fSource = (lookahead=='.' || lookahead=='\'' || /*dumb HP*/lookahead=='/' /* HP needs this too but causes problems || isalpha(lookahead)--use --source flag*/); } if (fDiff && (!fSource || fn!=HTML)) { fprintf(stderr, "diff incorporation supported for man page source, generating HTML\n"); exit(1); } if (fSource) source_filter(); else preformatted_filter(); if (fDiff) fclose(difffd); /*free(File); -- let system clean up, perhaps more efficiently */ return 0; }