--- mawk-1.3.3/rexp/rexp0.c.orig	2006-11-01 09:06:13.000000000 -0500
+++ mawk-1.3.3/rexp/rexp0.c	2006-11-01 09:26:03.000000000 -0500
@@ -2,6 +2,7 @@
 /********************************************
 rexp0.c
 copyright 1991, Michael D. Brennan
+copyright 2006, Sam Trenholme
 
 This is a source file for mawk, an implementation of
 the AWK programming language.
@@ -11,6 +12,9 @@
 ********************************************/
 
 /*$Log: rexp0.c,v $
+ *Revision 1.6  2006/11/01 08:00:12  sam
+ *Added support for POSIX character classes (POSIX 9.3.5.6)
+ *
  *Revision 1.5  1996/11/08 15:39:27  mike
  *While cleaning up block_on, I introduced a bug. Now fixed.
  *
@@ -355,6 +359,7 @@
    char *q, *t ;
    int cnt ;
    int comp_flag ;
+   int posix_class;
 
    p = t = (*start) + 1 ;
 
@@ -364,27 +369,50 @@
    if (*p == ']') p++ ;
    else if (*p == '^' && *(p + 1) == ']')  p += 2 ;
 
-   while (1)			/* find the back of the class */
+   q = p;
+   prev = -1;
+   posix_class = 0;
+   cnt = 0;
+
+   while(*q != '\0') /* find the back of the class */
    {
-      if (!(q = strchr(p, ']')))
+      if(prev == '[' && *q == ':') /* POSIX character class */
       {
-	 /* no closing bracket */
-	 RE_error_trap(-E3) ;
+         posix_class = 1;
       }
-      p = q - 1 ;
-      cnt = 0 ;
-      while (*p == '\\')
+      if(*q == ']' && prev != ':' && ((cnt & 1) == 0)) 
       {
-	 cnt++ ; p-- ; 
+         break;
       }
-      if ((cnt & 1) == 0)
+      if(*q == ']' && prev == ':' && posix_class == 0)
       {
-	 /* even number of \ */
-	 break ;
+         break;
+      }
+      if(prev == ':' && *q == ']') 
+      {
+         posix_class = 0;
+      }
+      if(*q == '\\' && prev != '\\') 
+      {
+         cnt = 1;
       }
-      p = q + 1 ;
+      if(*q == '\\' && prev == '\\')
+      {
+         cnt++;
+      }
+      if(*q != '\\') 
+      {
+         cnt = 0;
+      }
+      prev = *q;
+      q++;
    }
 
+   if(*q != ']') 
+   {
+      RE_error_trap(-E3) ;
+   }
+        
    /*  q  now  pts at the back of the class   */
    p = t ;
    *start = q + 1 ;
@@ -447,6 +475,105 @@
 	    }
 	    break ;
 
+	 case '[':
+             /* This is code to make Mawk more 
+                POSIX compliant.  Basically, POSIX has the following
+	        character class expressions:
+
+                [:alnum:] -> A-Za-z0-9
+                [:alpha:] -> A-Za-z
+                [:blank:] -> space, tab
+                [:cntrl:] -> Control characters (ASCII 1-31; 127)
+                [:digit:] -> 0-9
+                [:graph:] -> !-~ (printable and visible)
+                [:lower:] -> a-z
+                [:print:] -> non-control characters (ASCII 32-126)
+                [:punct:] -> !-/:-@\[-`\{-~
+                [:space:] -> Space characters (space, tab, and formfeed)
+                [:upper:] -> A-Z
+                [:xdigit:] -> A-Fa-f0-9 (hex digit)
+               
+	        Note that this code does not give Mawk the ability
+	        to handle non-C/ASCII locales; all this does is allow
+	        Mawk to handle the POSIX character class expressions.
+                This lets us write case-sensitive regular expressions 
+                in Gawk that do not break in non-C locales, and have 
+                the REs work in Mawk.
+              */
+	    if(strncmp(p,"[:upper:]",9) == 0) {
+                block_on(*bvp,'A','Z');
+                prev = -1;
+		p += 9;
+                break;
+            } else if (strncmp(p,"[:lower:]",9) == 0) {
+                block_on(*bvp,'a','z');
+                prev = -1;
+		p += 9;
+                break;
+            } else if (strncmp(p,"[:alnum:]",9) == 0) {
+                block_on(*bvp,'a','z');
+                block_on(*bvp,'A','Z');
+                block_on(*bvp,'0','9');
+                prev = -1;
+		p += 9;
+                break;
+            } else if (strncmp(p,"[:alpha:]",9) == 0) {
+                block_on(*bvp,'a','z');
+                block_on(*bvp,'A','Z');
+                prev = -1;
+		p += 9;
+                break;
+            } else if (strncmp(p,"[:digit:]",9) == 0) {
+                block_on(*bvp,'0','9');
+                prev = -1;
+		p += 9;
+                break;
+            } else if (strncmp(p,"[:graph:]",9) == 0) {
+                block_on(*bvp,'!','~');
+                prev = -1;
+		p += 9;
+                break;
+            } else if (strncmp(p,"[:print:]",9) == 0) {
+                block_on(*bvp,32,126);
+                prev = -1;
+		p += 9;
+                break;
+            } else if (strncmp(p,"[:punct:]",9) == 0) {
+                block_on(*bvp,'!','/');
+                block_on(*bvp,':','@');
+                block_on(*bvp,'[','`');
+                block_on(*bvp,'{','~');
+                prev = -1;
+		p += 9;
+                break;
+            } else if (strncmp(p,"[:blank:]",9) == 0) {
+                on(*bvp,' ');
+                on(*bvp,'\t');
+                prev = -1;
+		p += 9;
+                break;
+            } else if (strncmp(p,"[:space:]",9) == 0) {
+                on(*bvp,' ');
+                on(*bvp,'\t');
+                on(*bvp,'\f');
+                prev = -1;
+		p += 9;
+                break;
+            } else if (strncmp(p,"[:cntrl:]",9) == 0) {
+                block_on(*bvp,1,31);
+                on(*bvp,127);
+                prev = -1;
+		p += 9;
+                break;
+            } else if (strncmp(p,"[:xdigit:]",10) == 0) {
+                block_on(*bvp,'a','f');
+                block_on(*bvp,'A','F');
+                block_on(*bvp,'0','9');
+                prev = -1;
+		p += 10;
+                break;
+            }
+
 	 default:
 	    prev = *(unsigned char *) p++ ;
 	    on(*bvp, prev) ;
