string.h

Go to the documentation of this file.
00001 #ifndef ETHERBOOT_BITS_STRING_H
00002 #define ETHERBOOT_BITS_STRING_H
00003 /*
00004  * Taken from Linux /usr/include/asm/string.h
00005  * All except memcpy, memmove, memset and memcmp removed.
00006  *
00007  * Non-standard memswap() function added because it saves quite a bit
00008  * of code (mbrown@fensystems.co.uk).
00009  */
00010 
00011 /*
00012  * This string-include defines all string functions as inline
00013  * functions. Use gcc. It also assumes ds=es=data space, this should be
00014  * normal. Most of the string-functions are rather heavily hand-optimized,
00015  * see especially strtok,strstr,str[c]spn. They should work, but are not
00016  * very easy to understand. Everything is done entirely within the register
00017  * set, making the functions fast and clean. String instructions have been
00018  * used through-out, making for "slightly" unclear code :-)
00019  *
00020  *              NO Copyright (C) 1991, 1992 Linus Torvalds,
00021  *              consider these trivial functions to be PD.
00022  */
00023 
00024 FILE_LICENCE ( PUBLIC_DOMAIN );
00025 
00026 #define __HAVE_ARCH_MEMCPY
00027 
00028 extern void * __memcpy ( void *dest, const void *src, size_t len );
00029 
00030 #if 0
00031 static inline __attribute__ (( always_inline )) void *
00032 __memcpy ( void *dest, const void *src, size_t len ) {
00033         int d0, d1, d2;
00034         __asm__ __volatile__ ( "rep ; movsb"
00035                                : "=&c" ( d0 ), "=&S" ( d1 ), "=&D" ( d2 )
00036                                : "0" ( len ), "1" ( src ), "2" ( dest )
00037                                : "memory" );
00038         return dest; 
00039 }
00040 #endif
00041 
00042 static inline __attribute__ (( always_inline )) void *
00043 __constant_memcpy ( void *dest, const void *src, size_t len ) {
00044         union {
00045                 uint32_t u32[2];
00046                 uint16_t u16[4];
00047                 uint8_t  u8[8];
00048         } __attribute__ (( __may_alias__ )) *dest_u = dest;
00049         const union {
00050                 uint32_t u32[2];
00051                 uint16_t u16[4];
00052                 uint8_t  u8[8];
00053         } __attribute__ (( __may_alias__ )) *src_u = src;
00054         const void *esi;
00055         void *edi;
00056 
00057         switch ( len ) {
00058         case 0 : /* 0 bytes */
00059                 return dest;
00060         /*
00061          * Single-register moves; these are always better than a
00062          * string operation.  We can clobber an arbitrary two
00063          * registers (data, source, dest can re-use source register)
00064          * instead of being restricted to esi and edi.  There's also a
00065          * much greater potential for optimising with nearby code.
00066          *
00067          */
00068         case 1 : /* 4 bytes */
00069                 dest_u->u8[0]  = src_u->u8[0];
00070                 return dest;
00071         case 2 : /* 6 bytes */
00072                 dest_u->u16[0] = src_u->u16[0];
00073                 return dest;
00074         case 4 : /* 4 bytes */
00075                 dest_u->u32[0] = src_u->u32[0];
00076                 return dest;
00077         /*
00078          * Double-register moves; these are probably still a win.
00079          *
00080          */
00081         case 3 : /* 12 bytes */
00082                 dest_u->u16[0] = src_u->u16[0];
00083                 dest_u->u8[2]  = src_u->u8[2];
00084                 return dest;
00085         case 5 : /* 10 bytes */
00086                 dest_u->u32[0] = src_u->u32[0];
00087                 dest_u->u8[4]  = src_u->u8[4];
00088                 return dest;
00089         case 6 : /* 12 bytes */
00090                 dest_u->u32[0] = src_u->u32[0];
00091                 dest_u->u16[2] = src_u->u16[2];
00092                 return dest;
00093         case 8 : /* 10 bytes */
00094                 dest_u->u32[0] = src_u->u32[0];
00095                 dest_u->u32[1] = src_u->u32[1];
00096                 return dest;
00097         }
00098 
00099         /* Even if we have to load up esi and edi ready for a string
00100          * operation, we can sometimes save space by using multiple
00101          * single-byte "movs" operations instead of loading up ecx and
00102          * using "rep movsb".
00103          *
00104          * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
00105          * to allow for saving/restoring ecx 50% of the time.
00106          *
00107          * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes.
00108          * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte,
00109          * but "movsl" moves twice as much data, so it balances out).
00110          *
00111          * The cutoff point therefore occurs around 26 bytes; the byte
00112          * requirements for each method are:
00113          *
00114          * len             16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
00115          * #bytes (ecx)     8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
00116          * #bytes (no ecx)  4  5  6  7  5  6  7  8  6  7  8  9  7  8  9 10
00117          */
00118 
00119         esi = src;
00120         edi = dest;
00121         
00122         if ( len >= 26 )
00123                 return __memcpy ( dest, src, len );
00124         
00125         if ( len >= 6*4 )
00126                 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
00127                                        : "0" ( edi ), "1" ( esi ) : "memory" );
00128         if ( len >= 5*4 )
00129                 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
00130                                        : "0" ( edi ), "1" ( esi ) : "memory" );
00131         if ( len >= 4*4 )
00132                 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
00133                                        : "0" ( edi ), "1" ( esi ) : "memory" );
00134         if ( len >= 3*4 )
00135                 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
00136                                        : "0" ( edi ), "1" ( esi ) : "memory" );
00137         if ( len >= 2*4 )
00138                 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
00139                                        : "0" ( edi ), "1" ( esi ) : "memory" );
00140         if ( len >= 1*4 )
00141                 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
00142                                        : "0" ( edi ), "1" ( esi ) : "memory" );
00143         if ( ( len % 4 ) >= 2 )
00144                 __asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
00145                                        : "0" ( edi ), "1" ( esi ) : "memory" );
00146         if ( ( len % 2 ) >= 1 )
00147                 __asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
00148                                        : "0" ( edi ), "1" ( esi ) : "memory" );
00149 
00150         return dest;
00151 }
00152 
00153 #define memcpy( dest, src, len )                        \
00154         ( __builtin_constant_p ( (len) ) ?              \
00155           __constant_memcpy ( (dest), (src), (len) ) :  \
00156           __memcpy ( (dest), (src), (len) ) )
00157 
00158 #define __HAVE_ARCH_MEMMOVE
00159 static inline void * memmove(void * dest,const void * src, size_t n)
00160 {
00161 int d0, d1, d2;
00162 if (dest<src)
00163 __asm__ __volatile__(
00164         "cld\n\t"
00165         "rep\n\t"
00166         "movsb"
00167         : "=&c" (d0), "=&S" (d1), "=&D" (d2)
00168         :"0" (n),"1" (src),"2" (dest)
00169         : "memory");
00170 else
00171 __asm__ __volatile__(
00172         "std\n\t"
00173         "rep\n\t"
00174         "movsb\n\t"
00175         "cld"
00176         : "=&c" (d0), "=&S" (d1), "=&D" (d2)
00177         :"0" (n),
00178          "1" (n-1+(const char *)src),
00179          "2" (n-1+(char *)dest)
00180         :"memory");
00181 return dest;
00182 }
00183 
00184 #define __HAVE_ARCH_MEMSET
00185 static inline void * memset(void *s, int c,size_t count)
00186 {
00187 int d0, d1;
00188 __asm__ __volatile__(
00189         "cld\n\t"
00190         "rep\n\t"
00191         "stosb"
00192         : "=&c" (d0), "=&D" (d1)
00193         :"a" (c),"1" (s),"0" (count)
00194         :"memory");
00195 return s;
00196 }
00197 
00198 #define __HAVE_ARCH_MEMSWAP
00199 static inline void * memswap(void *dest, void *src, size_t n)
00200 {
00201 int d0, d1, d2, d3;
00202 __asm__ __volatile__(
00203         "\n1:\t"
00204         "movb (%%edi),%%al\n\t"
00205         "xchgb (%%esi),%%al\n\t"
00206         "incl %%esi\n\t"
00207         "stosb\n\t"
00208         "loop 1b"
00209         : "=&c" (d0), "=&S" (d1), "=&D" (d2), "=&a" (d3)
00210         : "0" (n), "1" (src), "2" (dest)
00211         : "memory" );
00212 return dest;
00213 }
00214 
00215 #define __HAVE_ARCH_STRNCMP
00216 static inline int strncmp(const char * cs,const char * ct,size_t count)
00217 {
00218 register int __res;
00219 int d0, d1, d2;
00220 __asm__ __volatile__(
00221         "1:\tdecl %3\n\t"
00222         "js 2f\n\t"
00223         "lodsb\n\t"
00224         "scasb\n\t"
00225         "jne 3f\n\t"
00226         "testb %%al,%%al\n\t"
00227         "jne 1b\n"
00228         "2:\txorl %%eax,%%eax\n\t"
00229         "jmp 4f\n"
00230         "3:\tsbbl %%eax,%%eax\n\t"
00231         "orb $1,%%al\n"
00232         "4:"
00233                      :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
00234                      :"1" (cs),"2" (ct),"3" (count));
00235 return __res;
00236 }
00237 
00238 #define __HAVE_ARCH_STRLEN
00239 static inline size_t strlen(const char * s)
00240 {
00241 int d0;
00242 register int __res;
00243 __asm__ __volatile__(
00244         "repne\n\t"
00245         "scasb\n\t"
00246         "notl %0\n\t"
00247         "decl %0"
00248         :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff));
00249 return __res;
00250 }
00251 
00252 #endif /* ETHERBOOT_BITS_STRING_H */

Generated on Tue Apr 6 20:01:09 2010 for gPXE by  doxygen 1.5.7.1