%{ /* * lexgrog.l: extract 'whatis' info from nroff man / formatted cat pages. * * Copyright (C) 1994, 1995 Graeme W. Wilford. (Wilf.) * * This file is part of man-db. * * man-db is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * man-db is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with man-db; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Wed Oct 12 18:46:11 BST 1994 Wilf. (G.Wilford@ee.surrey.ac.uk) */ #undef PROFILE #ifdef HAVE_CONFIG_H # include "config.h" #endif /* HAVE_CONFIG_H */ #include #ifndef STDC_HEADERS extern int errno; #endif #ifdef HAVE_UNISTD_H # include #endif /* HAVE_UNISTD_H */ #ifdef HAVE_STRING_H # include #elif defined (HAVE_STRINGS_H) # include #endif /* HAVE_STRING_H */ #include "lib/gettext.h" #define _(String) gettext (String) #include "manconfig.h" #include "lib/error.h" #define YY_READ_BUF_SIZE 1024 #define MAX_NAME 2048 #ifdef PROFILE static int ctr[YY_NUM_RULES]; # define YY_USER_ACTION ++ctr[yy_act]; #endif static void add_str_to_whatis(const char *string, size_t length); static void add_char_to_whatis(unsigned char c); static void newline_found(void); static char newname[MAX_NAME]; static char *p_name, *fname; static char filters[MAX_FILTERS]; #ifdef TEST #include char *program_name = "lexgrog"; #endif /* TEST */ %} %option 8bit batch caseful never-interactive ecs meta-ecs %option nostdinit %option noyywrap nounput %x MAN_NAME %x CAT_NAME %x CAT_FILE %x MAN_FILE %x CAT_REST %x MAN_REST %x FORCE_EXIT digit [[:digit:]] upper [[:upper:]] alpha [[:alpha:]] blank [[:blank:]] blank_eol [[:blank:]\n] bol \n+ next \n* dbl_quote \" font_change \\f([[:upper:]1-4]|\({upper}{2}) size_change \\s[+-]?{digit} style_change ({font_change}{size_change}?|{size_change}{font_change}?) typeface \.(B[IR]?|I[BR]?|R[BI]) sec_request \.[Ss][HhYySs] comment ['.]\\{dbl_quote} name (N[AaOo][Mm][Nn|Ee]|N[Oo][Mm]|N[Oo][Mm][Bb][Rr][Ee]|B[Ee][Zz][Ee][Ii][Cc][Hh][Nn][Uu][Nn][Gg]) name_sec {dbl_quote}?{style_change}?{name}{style_change}?{dbl_quote}? tbl_request \.[Tt][Ss] eqn_request \.[Ee][Qq] pic_request \.[Pp][Ss][ 0-9.<]? ref1_request \.[Rr]1 ref2_request \.\[ %% /* begin NAME section processing */ {sec_request}{blank_eol}+{name_sec}{blank}* BEGIN(MAN_NAME); (\n{blank}*){2,}{name}{blank}*\n{blank}+ BEGIN(CAT_NAME); /* general text matching */ \.[^Ss].*{next} | \..{0,3}{dbl_quote}?.{0,4}{dbl_quote}? | {comment}.*{next} | .{1,9} | [ ]* | \n{2,} | .|\n { {bol}{tbl_request} filters[TBL_FILTER] = 't'; {bol}{eqn_request} filters[EQN_FILTER] = 'e'; {bol}{pic_request} filters[PIC_FILTER] = 'p'; {bol}{ref1_request} | {bol}{ref2_request} filters[REF_FILTER] = 'r'; } <> { /* exit */ *p_name = '\0'; /* terminate the string */ yyterminate(); } /* rules to end NAME section processing */ .|\n { /* forced exit */ *p_name = '\0'; /* terminate the string */ yyterminate(); } {bol}{sec_request}{blank}* | /* Another section */ {bol}\.X{upper}{blank}+ | /* special - hpux */ {bol}\.sp{blank}* | /* vertical spacing */ \n{blank}*{bol}.+ { /* terminate the string */ *p_name = '\0'; BEGIN(MAN_REST); } {bol}S[yYeE] | \n{2,}.+ | {next}__ { /* terminate the string */ *p_name = '\0'; BEGIN(CAT_REST); yyterminate(); } /* ROFF request removal */ { {bol}{typeface}{blank}+ | /* type face commands */ {bol}\.IX{blank}.* | /* .IX line */ {bol}{comment}.* { /* per line comments */ newline_found(); } } -\n[ \t\n]* /* strip continuations */ /* convert to DASH */ {next}{blank}*\\\((mi|hy|em){blank}* | {next}[ \t\n]+[-\\]-{blank}* | {next}[ \t\n]*[-\\]-{blank}+ | {next}{blank}+-{1,2}[ \t\n]+ | {bol}\.Nd{blank}* add_str_to_whatis(" - ", 3); /* escape sequences and special characters */ { {next}\\[\\e] add_char_to_whatis('\\'); {next}\\('|\(aa) add_char_to_whatis('\''); {next}\\(`|\(ga) add_char_to_whatis('`'); {next}\\- add_char_to_whatis('-'); {next}\\\. add_char_to_whatis('.'); {next}((\\[ 0t])|[ ]|\t)* add_char_to_whatis(' '); {next}\\\((ru|ul) add_char_to_whatis('_'); {next}\\\\t add_char_to_whatis('\t'); {next}\\[|^&!%acdpruz{}\n] /* various useless control chars */ {next}\\[bhlLvx]{blank}*'[^']+' /* various inline functions */ {next}\\\$[1-9] /* interpolate arg */ {next}\\\*(\({alpha})?{alpha} /* interpolate string */ {next}\\\({alpha}{alpha} /* special (non printable) character */ {next}\\\".* /* comment */ {next}{font_change} /* font changes */ {next}\\k{alpha} /* mark input place in register */ {next}\\n(\({alpha})?{alpha} /* interpolate number register */ {next}\\o\"[^"]+\" /* overstrike chars */ {next}{size_change} /* size changes */ {next}\\w{blank}*'[^']+'[^ \t]* /* width of string */ {next}\\ /* catch all */ {next}\(\\\|\){blank}* /* function() in hpux */ } /* collapse spaces, escpaed spaces, tabs, newlines to a single space */ {next}((\\[ ])|{blank})* add_char_to_whatis(' '); /* a ROFF break request (.br) usually means we have multiple whatis definitions, provide a separator for later processing */ {bol}\.br{blank}* add_char_to_whatis((char) 0x11); {bol}\.{alpha}{2}{blank}* { /* very general roff requests */ newline_found(); } /* pass words as a chunk. speed optimization */ [[:alnum:]]* add_str_to_whatis(yytext, yyleng); /* normalise the period (,) separators */ {blank}*,[ \t\n]* | {blank}*,{blank}* add_str_to_whatis(", ", 2); {bol}. { newline_found(); add_char_to_whatis(yytext[yyleng - 1]); } . add_char_to_whatis(*yytext); /* default EOF rule */ <> return 1; %% /* print warning and force scanner to terminate */ static void too_big(void) { error(0, 0, _( "warning: whatis for %s exceeds %d bytes, truncating."), fname, MAX_NAME); BEGIN(FORCE_EXIT); } /* append a string to newname if enough room */ static void add_str_to_whatis(const char *string, size_t length) { if (p_name - newname + length >= MAX_NAME) { too_big(); } else { (void) strncpy(p_name, string, length); p_name += length; } } /* append a char to newname if enough room */ static void add_char_to_whatis(unsigned char c) { if (p_name - newname + 1 >= MAX_NAME) too_big(); else *p_name++ = c; } static void newline_found(void) { /* If we are mid p_name and the last added char was not a space, best add one */ if (p_name != newname && *(p_name - 1) != ' ') add_char_to_whatis(' '); } char *find_name(char *file, char *filename, short catfile) { int ret; if ( (yyin = fopen(file, "r")) == NULL) { error (0, errno, _( "can't open %s"), file); return NULL; } fname = filename; *(p_name = newname) = '\0'; memset( filters, '_', sizeof(filters)); if (catfile) BEGIN(CAT_FILE); else BEGIN(MAN_FILE); yyrestart(yyin); ret = yylex(); fclose(yyin); if (ret) return NULL; else { /* wipe out any initial spaces */ for (p_name = newname; *p_name == ' '; p_name++); /* prepend filters to whatis */ return strappend( 0, "§", filters, "§", p_name, 0); } } #ifdef TEST int main(int argc, char **argv) { if (argc > 1) while (*++argv) { printf("man: \"%s\"\n", find_name(*argv, "-", 0)); printf("cat: \"%s\"\n", find_name(*argv, "-", 1)); } return 0; } #endif #ifdef PROFILE void rule_profile(void) { int i, tot = 0; printf("found NAME in %d man, %d cat pages\n", ctr[1], ctr[2]); for (i = 3; i <= YY_NUM_RULES; i++) if (ctr[i]) { printf("rule[%d]: %d\n", i, ctr[i]); tot += ctr[i]; } printf("Total rules executed: %d\n", tot); } #else void rule_profile(void){} #endif