ºìÁªLinuxÃÅ»§
Linux°ïÖú

ÈçºÎÔÚCÓïÑÔÖÐÇÉÓÃÕýÔò±í´ïʽ

·¢²¼Ê±¼ä:2006-05-03 12:12:40À´Ô´:ºìÁª×÷Õß:°®µÄ±ßÔµ
Èç¹ûÓû§ÊìϤLinuxϵÄsed¡¢awk¡¢grep»òvi£¬ÄÇô¶ÔÕýÔò±í´ïʽÕâÒ»¸ÅÄî¿Ï¶¨²»»áÄ°Éú¡£ÓÉÓÚËü¿ÉÒÔ¼«´óµØ¼ò»¯´¦Àí×Ö·û´®Ê±µÄ¸´ÔӶȣ¬Òò´ËÏÖÔÚÒѾ­ÔÚÐí¶àLinuxʵÓù¤¾ßÖеõ½ÁËÓ¦Óá£Ç§Íò²»ÒªÒÔΪÕýÔò±í´ïʽֻÊÇPerl¡¢Python¡¢BashµÈ½Å±¾ÓïÑÔµÄרÀû£¬×÷ΪCÓïÑÔ³ÌÐòÔ±£¬Óû§Í¬Ñù¿ÉÒÔÔÚ×Ô¼ºµÄ³ÌÐòÖÐÔËÓÃÕýÔò±í´ïʽ¡£

±ê×¼µÄCºÍC++¶¼²»Ö§³ÖÕýÔò±í´ïʽ£¬µ«ÓÐһЩº¯Êý¿â¿ÉÒÔ¸¨ÖúC/C++³ÌÐòÔ±Íê³ÉÕâÒ»¹¦ÄÜ£¬ÆäÖÐ×îÖøÃûµÄµ±ÊýPhilip HazelµÄPerl-Compatible Regular Expression¿â£¬Ðí¶àLinux·¢Ðа汾¶¼´øÓÐÕâ¸öº¯Êý¿â¡£

±àÒëÕýÔò±í´ïʽ

ΪÁËÌá¸ßЧÂÊ£¬ÔÚ½«Ò»¸ö×Ö·û´®ÓëÕýÔò±í´ïʽ½øÐбȽÏ֮ǰ£¬Ê×ÏÈÒªÓÃregcomp()º¯Êý¶ÔËü½øÐбàÒ룬½«Æäת»¯Îªregex_t½á¹¹£º

int regcomp(regex_t *preg, const char *regex, int cflags);

²ÎÊýregexÊÇÒ»¸ö×Ö·û´®£¬Ëü´ú±í½«Òª±»±àÒëµÄÕýÔò±í´ïʽ£»²ÎÊýpregÖ¸ÏòÒ»¸öÉùÃ÷Ϊregex_tµÄÊý¾Ý½á¹¹£¬ÓÃÀ´±£´æ±àÒë½á¹û£»²ÎÊýcflags¾ö¶¨ÁËÕýÔò±í´ïʽ¸ÃÈçºÎ±»´¦ÀíµÄϸ½Ú¡£

Èç¹ûº¯Êýregcomp()Ö´Ðгɹ¦£¬²¢ÇÒ±àÒë½á¹û±»ÕýÈ·Ìî³äµ½pregÖк󣬺¯Êý½«·µ»Ø0£¬ÈκÎÆäËüµÄ·µ»Ø½á¹û¶¼´ú±íÓÐijÖÖ´íÎó²úÉú¡£

Æ¥ÅäÕýÔò±í´ïʽ

Ò»µ©ÓÃregcomp()º¯Êý³É¹¦µØ±àÒëÁËÕýÔò±í´ïʽ£¬½ÓÏÂÀ´¾Í¿ÉÒÔµ÷ÓÃregexec()º¯ÊýÍê³ÉģʽƥÅ䣺

int regexec(const regex_t *preg, const char *string, size_t nmatch,regmatch_t pmatch[], int eflags);

typedef struct {

regoff_t rm_so;

regoff_t rm_eo;

} regmatch_t;

²ÎÊýpregÖ¸Ïò±àÒëºóµÄÕýÔò±í´ïʽ£¬²ÎÊýstringÊǽ«Òª½øÐÐÆ¥ÅäµÄ×Ö·û´®£¬¶ø²ÎÊýnmatchºÍpmatchÔòÓÃÓÚ°ÑÆ¥Åä½á¹û·µ»Ø¸øµ÷ÓóÌÐò£¬×îºóÒ»¸ö²ÎÊýeflags¾ö¶¨ÁËÆ¥ÅäµÄϸ½Ú¡£

ÔÚµ÷Óú¯Êýregexec()½øÐÐģʽƥÅäµÄ¹ý³ÌÖУ¬¿ÉÄÜÔÚ×Ö·û´®stringÖлáÓжദÓë¸ø¶¨µÄÕýÔò±í´ïʽÏàÆ¥Å䣬²ÎÊýpmatch¾ÍÊÇÓÃÀ´±£´æÕâЩƥÅäλÖõģ¬¶ø²ÎÊýnmatchÔò¸æËߺ¯Êýregexec()×î¶à¿ÉÒ԰ѶàÉÙ¸öÆ¥Åä½á¹ûÌî³äµ½pmatchÊý×éÖС£µ±regexec()º¯Êý³É¹¦·µ»Øʱ£¬´Óstring+pmatch[0].rm_soµ½string+pmatch[0].rm_eoÊǵÚÒ»¸öÆ¥ÅäµÄ×Ö·û´®£¬¶ø´Óstring+pmatch[1].rm_soµ½string+pmatch[1].rm_eo£¬ÔòÊǵڶþ¸öÆ¥ÅäµÄ×Ö·û´®£¬ÒÀ´ËÀàÍÆ¡£

ÊÍ·ÅÕýÔò±í´ïʽ

ÎÞÂÛʲôʱºò£¬µ±²»ÔÙÐèÒªÒѾ­±àÒë¹ýµÄÕýÔò±í´ïʽʱ£¬¶¼Ó¦¸Ãµ÷Óú¯Êýregfree()½«ÆäÊÍ·Å£¬ÒÔÃâ²úÉúÄÚ´æй©¡£

void regfree(regex_t *preg);

º¯Êýregfree()²»»á·µ»ØÈκνá¹û£¬Ëü½ö½ÓÊÕÒ»¸öÖ¸Ïòregex_tÊý¾ÝÀàÐ͵ÄÖ¸Õ룬ÕâÊÇ֮ǰµ÷ÓÃregcomp()º¯ÊýËùµÃµ½µÄ±àÒë½á¹û¡£

Èç¹ûÔÚ³ÌÐòÖÐÕë¶Ôͬһ¸öregex_t½á¹¹µ÷ÓÃÁ˶à´Îregcomp()º¯Êý£¬POSIX±ê×¼²¢Ã»Óй涨ÊÇ·ñÿ´Î¶¼±ØÐëµ÷ÓÃregfree()º¯Êý½øÐÐÊÍ·Å£¬µ«½¨Òéÿ´Îµ÷ÓÃregcomp()º¯Êý¶ÔÕýÔò±í´ïʽ½øÐбàÒëºó¶¼µ÷ÓÃÒ»´Îregfree()º¯Êý£¬ÒÔ¾¡ÔçÊÍ·ÅÕ¼ÓõĴ洢¿Õ¼ä¡£

±¨¸æ´íÎóÐÅÏ¢



Èç¹ûµ÷Óú¯Êýregcomp()»òregexec()µÃµ½µÄÊÇÒ»¸ö·Ç0µÄ·µ»ØÖµ£¬Ôò±íÃ÷ÔÚ¶ÔÕýÔò±í´ïʽµÄ´¦Àí¹ý³ÌÖгöÏÖÁËijÖÖ´íÎ󣬴Ëʱ¿ÉÒÔͨ¹ýµ÷Óú¯Êýregerror()µÃµ½ÏêϸµÄ´íÎóÐÅÏ¢¡£

size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size);

²ÎÊýerrcodeÊÇÀ´×Ôº¯Êýregcomp()»òregexec()µÄ´íÎó´úÂ룬¶ø²ÎÊýpregÔòÊÇÓɺ¯Êýregcomp()µÃµ½µÄ±àÒë½á¹û£¬ÆäÄ¿µÄÊǰѸñʽ»¯ÏûÏ¢Ëù±ØÐëµÄÉÏÏÂÎÄÌṩ¸øregerror()º¯Êý¡£ÔÚÖ´Ðк¯Êýregerror()ʱ£¬½«°´ÕÕ²ÎÊýerrbuf_sizeÖ¸Ã÷µÄ×î´ó×Ö½ÚÊý£¬ÔÚerrbuf»º³åÇøÖÐÌîÈë¸ñʽ»¯ºóµÄ´íÎóÐÅÏ¢£¬Í¬Ê±·µ»Ø´íÎóÐÅÏ¢µÄ³¤¶È¡£

Ó¦ÓÃÕýÔò±í´ïʽ

×îºó¸ø³öÒ»¸ö¾ßÌåµÄʵÀý£¬½éÉÜÈçºÎÔÚCÓïÑÔ³ÌÐòÖд¦ÀíÕýÔò±í´ïʽ¡£

#include

#include

#include

/* È¡×Ó´®µÄº¯Êý */

static char* substr(const char*str, unsigned start, unsigned end)

{

unsigned n = end - start;

static char stbuf[256];

strncpy(stbuf, str + start, n);

stbuf[n] = 0;

return stbuf;

}

/* Ö÷³ÌÐò */

int main(int argc, char** argv)

{

char * pattern;

int x, z, lno = 0, cflags = 0;

char ebuf[128], lbuf[256];

regex_t reg;

regmatch_t pm[10];

const size_t nmatch = 10;

/* ±àÒëÕýÔò±í´ïʽ*/

pattern = argv[1];

z = regcomp(®, pattern, cflags);

if (z != 0){

regerror(z, ®, ebuf, sizeof(ebuf));

fprintf(stderr, "%s: pattern '%s' \n", ebuf, pattern);

return 1;

}

/* ÖðÐд¦ÀíÊäÈëµÄÊý¾Ý */

while(fgets(lbuf, sizeof(lbuf), stdin)) {

++lno;

if ((z = strlen(lbuf)) > 0 && lbuf[z-1] == '\n')

lbuf[z - 1] = 0;

/* ¶ÔÿһÐÐÓ¦ÓÃÕýÔò±í´ïʽ½øÐÐÆ¥Åä */

z = regexec(®, lbuf, nmatch, pm, 0);

if (z == REG_NOMATCH) continue;

else if (z != 0) {

regerror(z, ®, ebuf, sizeof(ebuf));

fprintf(stderr, "%s: regcom('%s')\n", ebuf, lbuf);

return 2;

}

/* Êä³ö´¦Àí½á¹û */

for (x = 0; x < nmatch && pm[x].rm_so != -1; ++ x) {

if (!x) printf("%04d: %s\n", lno, lbuf);

printf(" $%d='%s'\n", x, substr(lbuf, pm[x].rm_so, pm[x].rm_eo));

}

}

/* ÊÍ·ÅÕýÔò±í´ïʽ */

regfree(®);

return 0;

}

ÉÏÊö³ÌÐò¸ºÔð´ÓÃüÁîÐлñÈ¡ÕýÔò±í´ïʽ£¬È»ºó½«ÆäÔËÓÃÓÚ´Ó±ê×¼ÊäÈëµÃµ½µÄÿÐÐÊý¾Ý£¬²¢´òÓ¡³öÆ¥Åä½á¹û¡£Ö´ÐÐÏÂÃæµÄÃüÁî¿ÉÒÔ±àÒë²¢Ö´ÐиóÌÐò£º

# gcc regexp.c -o regexp

# ./regexp 'regex[a-z]*' < regexp.c

0003: #include

$0='regex'

0027: regex_t reg;

$0='regex'

0054: z = regexec(®, lbuf, nmatch, pm, 0);

$0='regexec'

С½á

¶ÔÄÇЩÐèÒª½øÐи´ÔÓÊý¾Ý´¦ÀíµÄ³ÌÐòÀ´Ëµ£¬ÕýÔò±í´ïʽÎÞÒÉÊÇÒ»¸ö·Ç³£ÓÐÓõŤ¾ß¡£±¾ÎÄÖصãÔÚÓÚ²ûÊöÈçºÎÔÚCÓïÑÔÖÐÀûÓÃÕýÔò±í´ïʽÀ´¼ò»¯×Ö·û´®´¦Àí£¬ÒÔ±ãÔÚÊý¾Ý´¦Àí·½ÃæÄܹ»»ñµÃÓëPerlÓïÑÔÀàËƵÄÁé»îÐÔ¡£
ÎÄÕÂÆÀÂÛ

¹²ÓÐ 0 ÌõÆÀÂÛ