uri.c File Reference

Uniform Resource Identifiers. More...

#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <libgen.h>
#include <ctype.h>
#include <gpxe/vsprintf.h>
#include <gpxe/uri.h>

Go to the source code of this file.

Functions

 FILE_LICENCE (GPL2_OR_LATER)
static void dump_uri (struct uri *uri)
 Dump URI for debugging.
struct uriparse_uri (const char *uri_string)
 Parse URI.
unsigned int uri_port (struct uri *uri, unsigned int default_port)
 Get port from URI.
int unparse_uri (char *buf, size_t size, struct uri *uri, unsigned int fields)
 Unparse URI.
struct uriuri_dup (struct uri *uri)
 Duplicate URI.
char * resolve_path (const char *base_path, const char *relative_path)
 Resolve base+relative path.
struct uriresolve_uri (struct uri *base_uri, struct uri *relative_uri)
 Resolve base+relative URI.
static int is_unreserved_uri_char (int c, int field)
 Test for unreserved URI characters.
size_t uri_encode (const char *raw_string, char *buf, ssize_t len, int field)
 URI-encode string.
size_t uri_decode (const char *encoded_string, char *buf, ssize_t len)
 Decode URI-encoded string.


Detailed Description

Uniform Resource Identifiers.

Definition in file uri.c.


Function Documentation

FILE_LICENCE ( GPL2_OR_LATER   ) 

static void dump_uri ( struct uri uri  )  [static]

Dump URI for debugging.

Parameters:
uri URI

Definition at line 40 of file uri.c.

References DBG, uri::fragment, uri::host, uri::opaque, uri::password, uri::path, uri::port, uri::query, uri::scheme, and uri::user.

Referenced by parse_uri(), and unparse_uri().

00040                                          {
00041         if ( ! uri )
00042                 return;
00043         if ( uri->scheme )
00044                 DBG ( " scheme \"%s\"", uri->scheme );
00045         if ( uri->opaque )
00046                 DBG ( " opaque \"%s\"", uri->opaque );
00047         if ( uri->user )
00048                 DBG ( " user \"%s\"", uri->user );
00049         if ( uri->password )
00050                 DBG ( " password \"%s\"", uri->password );
00051         if ( uri->host )
00052                 DBG ( " host \"%s\"", uri->host );
00053         if ( uri->port )
00054                 DBG ( " port \"%s\"", uri->port );
00055         if ( uri->path )
00056                 DBG ( " path \"%s\"", uri->path );
00057         if ( uri->query )
00058                 DBG ( " query \"%s\"", uri->query );
00059         if ( uri->fragment )
00060                 DBG ( " fragment \"%s\"", uri->fragment );
00061 }

struct uri* parse_uri ( const char *  uri_string  )  [read]

Parse URI.

Parameters:
uri_string URI as a string
Return values:
uri URI
Splits a URI into its component parts. The return URI structure is dynamically allocated and must eventually be freed by calling uri_put().

Definition at line 73 of file uri.c.

References DBG, dump_uri(), uri::fragment, uri::host, memcpy, memmove(), NULL, uri::opaque, uri::password, uri::path, uri::port, uri::query, raw(), uri::scheme, strchr(), strlen(), strncmp(), uri_decode(), URI_ENCODED, URI_FIRST_FIELD, uri_get_field, URI_LAST_FIELD, uri::user, and zalloc().

Referenced by boot_next_server_and_filename(), imgfetch(), test_parse_unparse(), test_resolve(), tftp_apply_settings(), uri_dup(), and xfer_open_uri_string().

00073                                                   {
00074         struct uri *uri;
00075         char *raw;
00076         char *tmp;
00077         char *path = NULL;
00078         char *authority = NULL;
00079         int i;
00080         size_t raw_len;
00081 
00082         /* Allocate space for URI struct and a copy of the string */
00083         raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
00084         uri = zalloc ( sizeof ( *uri ) + raw_len );
00085         if ( ! uri )
00086                 return NULL;
00087         raw = ( ( ( char * ) uri ) + sizeof ( *uri ) );
00088 
00089         /* Copy in the raw string */
00090         memcpy ( raw, uri_string, raw_len );
00091 
00092         /* Start by chopping off the fragment, if it exists */
00093         if ( ( tmp = strchr ( raw, '#' ) ) ) {
00094                 *(tmp++) = '\0';
00095                 uri->fragment = tmp;
00096         }
00097 
00098         /* Identify absolute/relative URI.  We ignore schemes that are
00099          * apparently only a single character long, since otherwise we
00100          * misinterpret a DOS-style path name ("C:\path\to\file") as a
00101          * URI with scheme="C",opaque="\path\to\file".
00102          */
00103         if ( ( tmp = strchr ( raw, ':' ) ) && ( tmp > ( raw + 1 ) ) ) {
00104                 /* Absolute URI: identify hierarchical/opaque */
00105                 uri->scheme = raw;
00106                 *(tmp++) = '\0';
00107                 if ( *tmp == '/' ) {
00108                         /* Absolute URI with hierarchical part */
00109                         path = tmp;
00110                 } else {
00111                         /* Absolute URI with opaque part */
00112                         uri->opaque = tmp;
00113                 }
00114         } else {
00115                 /* Relative URI */
00116                 path = raw;
00117         }
00118 
00119         /* If we don't have a path (i.e. we have an absolute URI with
00120          * an opaque portion, we're already finished processing
00121          */
00122         if ( ! path )
00123                 goto done;
00124 
00125         /* Chop off the query, if it exists */
00126         if ( ( tmp = strchr ( path, '?' ) ) ) {
00127                 *(tmp++) = '\0';
00128                 uri->query = tmp;
00129         }
00130 
00131         /* Identify net/absolute/relative path */
00132         if ( strncmp ( path, "//", 2 ) == 0 ) {
00133                 /* Net path.  If this is terminated by the first '/'
00134                  * of an absolute path, then we have no space for a
00135                  * terminator after the authority field, so shuffle
00136                  * the authority down by one byte, overwriting one of
00137                  * the two slashes.
00138                  */
00139                 authority = ( path + 2 );
00140                 if ( ( tmp = strchr ( authority, '/' ) ) ) {
00141                         /* Shuffle down */
00142                         uri->path = tmp;
00143                         memmove ( ( authority - 1 ), authority,
00144                                   ( tmp - authority ) );
00145                         authority--;
00146                         *(--tmp) = '\0';
00147                 }
00148         } else {
00149                 /* Absolute/relative path */
00150                 uri->path = path;
00151         }
00152 
00153         /* Split authority into user[:password] and host[:port] portions */
00154         if ( ( tmp = strchr ( authority, '@' ) ) ) {
00155                 /* Has user[:password] */
00156                 *(tmp++) = '\0';
00157                 uri->host = tmp;
00158                 uri->user = authority;
00159                 if ( ( tmp = strchr ( authority, ':' ) ) ) {
00160                         /* Has password */
00161                         *(tmp++) = '\0';
00162                         uri->password = tmp;
00163                 }
00164         } else {
00165                 /* No user:password */
00166                 uri->host = authority;
00167         }
00168 
00169         /* Split host into host[:port] */
00170         if ( ( tmp = strchr ( uri->host, ':' ) ) ) {
00171                 *(tmp++) = '\0';
00172                 uri->port = tmp;
00173         }
00174 
00175         /* Decode fields that should be decoded */
00176         for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
00177                 const char *field = uri_get_field ( uri, i );
00178                 if ( field && ( URI_ENCODED & ( 1 << i ) ) )
00179                         uri_decode ( field, ( char * ) field,
00180                                      strlen ( field ) + 1 /* NUL */ );
00181         }
00182 
00183  done:
00184         DBG ( "URI \"%s\" split into", uri_string );
00185         dump_uri ( uri );
00186         DBG ( "\n" );
00187 
00188         return uri;
00189 }

unsigned int uri_port ( struct uri uri,
unsigned int  default_port 
)

Get port from URI.

Parameters:
uri URI, or NULL
default_port Default port to use if none specified in URI
Return values:
port Port

Definition at line 198 of file uri.c.

References NULL, uri::port, and strtoul().

Referenced by ftp_open(), http_open_filter(), slam_open(), tcp_open_uri(), tftp_core_open(), and udp_open_uri().

00198                                                                      {
00199         if ( ( ! uri ) || ( ! uri->port ) )
00200                 return default_port;
00201         return ( strtoul ( uri->port, NULL, 0 ) );
00202 }

int unparse_uri ( char *  buf,
size_t  size,
struct uri uri,
unsigned int  fields 
)

Unparse URI.

Parameters:
buf Buffer to fill with URI string
size Size of buffer
uri URI to write into buffer, or NULL
fields Bitmask of fields to include in URI string, or URI_ALL
Return values:
len Length of URI string

Definition at line 213 of file uri.c.

References DBG, dump_uri(), ssnprintf(), uri_encode(), URI_ENCODED, URI_FIRST_FIELD, uri_get_field, URI_LAST_FIELD, URI_OPAQUE, and URI_SCHEME_BIT.

Referenced by http_step(), imgfetch(), test_parse_unparse(), test_resolve(), and uri_dup().

00214                                         {
00215         /* List of characters that typically go before certain fields */
00216         static char separators[] = { /* scheme */ 0, /* opaque */ ':',
00217                                      /* user */ 0, /* password */ ':',
00218                                      /* host */ '@', /* port */ ':',
00219                                      /* path */ 0, /* query */ '?',
00220                                      /* fragment */ '#' };
00221         int used = 0;
00222         int i;
00223 
00224         DBG ( "URI unparsing" );
00225         dump_uri ( uri );
00226         DBG ( "\n" );
00227 
00228         /* Ensure buffer is NUL-terminated */
00229         if ( size )
00230                 buf[0] = '\0';
00231 
00232         /* Special-case NULL URI */
00233         if ( ! uri )
00234                 return 0;
00235 
00236         /* Iterate through requested fields */
00237         for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
00238                 const char *field = uri_get_field ( uri, i );
00239                 char sep = separators[i];
00240 
00241                 /* Ensure `fields' only contains bits for fields that exist */
00242                 if ( ! field )
00243                         fields &= ~( 1 << i );
00244 
00245                 /* Store this field if we were asked to */
00246                 if ( fields & ( 1 << i ) ) {
00247                         /* Print :// if we're non-opaque and had a scheme */
00248                         if ( ( fields & URI_SCHEME_BIT ) &&
00249                              ( i > URI_OPAQUE ) ) {
00250                                 used += ssnprintf ( buf + used, size - used,
00251                                                     "://" );
00252                                 /* Only print :// once */
00253                                 fields &= ~URI_SCHEME_BIT;
00254                         }
00255 
00256                         /* Only print separator if an earlier field exists */
00257                         if ( sep && ( fields & ( ( 1 << i ) - 1 ) ) )
00258                                 used += ssnprintf ( buf + used, size - used,
00259                                                     "%c", sep );
00260 
00261                         /* Print contents of field, possibly encoded */
00262                         if ( URI_ENCODED & ( 1 << i ) )
00263                                 used += uri_encode ( field, buf + used,
00264                                                      size - used, i );
00265                         else
00266                                 used += ssnprintf ( buf + used, size - used,
00267                                                     "%s", field );
00268                 }
00269         }
00270 
00271         return used;
00272 }

struct uri* uri_dup ( struct uri uri  )  [read]

Duplicate URI.

Parameters:
uri URI
Return values:
uri Duplicate URI
Creates a modifiable copy of a URI.

Definition at line 282 of file uri.c.

References NULL, parse_uri(), unparse_uri(), and URI_ALL.

Referenced by resolve_uri().

00282                                          {
00283         size_t len = ( unparse_uri ( NULL, 0, uri, URI_ALL ) + 1 );
00284         char buf[len];
00285 
00286         unparse_uri ( buf, len, uri, URI_ALL );
00287         return parse_uri ( buf );
00288 }

char* resolve_path ( const char *  base_path,
const char *  relative_path 
)

Resolve base+relative path.

Parameters:
base_uri Base path
relative_uri Relative path
Return values:
resolved_uri Resolved path
Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative path (e.g. "initrd.gz") and produces a new path (e.g. "/var/lib/tftpboot/initrd.gz"). Note that any non-directory portion of the base path will automatically be stripped; this matches the semantics used when resolving the path component of URIs.

Definition at line 304 of file uri.c.

References asprintf(), dirname(), memcpy, NULL, strdup(), and strlen().

Referenced by resolve_uri().

00305                                                   {
00306         size_t base_len = ( strlen ( base_path ) + 1 );
00307         char base_path_copy[base_len];
00308         char *base_tmp = base_path_copy;
00309         char *resolved;
00310 
00311         /* If relative path is absolute, just re-use it */
00312         if ( relative_path[0] == '/' )
00313                 return strdup ( relative_path );
00314 
00315         /* Create modifiable copy of path for dirname() */
00316         memcpy ( base_tmp, base_path, base_len );
00317         base_tmp = dirname ( base_tmp );
00318 
00319         /* Process "./" and "../" elements */
00320         while ( *relative_path == '.' ) {
00321                 relative_path++;
00322                 if ( *relative_path == 0 ) {
00323                         /* Do nothing */
00324                 } else if ( *relative_path == '/' ) {
00325                         relative_path++;
00326                 } else if ( *relative_path == '.' ) {
00327                         relative_path++;
00328                         if ( *relative_path == 0 ) {
00329                                 base_tmp = dirname ( base_tmp );
00330                         } else if ( *relative_path == '/' ) {
00331                                 base_tmp = dirname ( base_tmp );
00332                                 relative_path++;
00333                         } else {
00334                                 relative_path -= 2;
00335                                 break;
00336                         }
00337                 } else {
00338                         relative_path--;
00339                         break;
00340                 }
00341         }
00342 
00343         /* Create and return new path */
00344         if ( asprintf ( &resolved, "%s%s%s", base_tmp,
00345                         ( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ?
00346                           "" : "/" ), relative_path ) < 0 )
00347                 return NULL;
00348 
00349         return resolved;
00350 }

struct uri* resolve_uri ( struct uri base_uri,
struct uri relative_uri 
) [read]

Resolve base+relative URI.

Parameters:
base_uri Base URI, or NULL
relative_uri Relative URI
Return values:
resolved_uri Resolved URI
Takes a base URI (e.g. "http://etherboot.org/kernels/vmlinuz" and a relative URI (e.g. "../initrds/initrd.gz") and produces a new URI (e.g. "http://etherboot.org/initrds/initrd.gz").

Definition at line 363 of file uri.c.

References uri::fragment, free(), memcpy, NULL, uri::path, uri::query, resolve_path(), uri_dup(), and uri_is_absolute().

Referenced by churi(), test_resolve(), and xfer_open_uri().

00364                                                       {
00365         struct uri tmp_uri;
00366         char *tmp_path = NULL;
00367         struct uri *new_uri;
00368 
00369         /* If relative URI is absolute, just re-use it */
00370         if ( uri_is_absolute ( relative_uri ) || ( ! base_uri ) )
00371                 return uri_get ( relative_uri );
00372 
00373         /* Mangle URI */
00374         memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
00375         if ( relative_uri->path ) {
00376                 tmp_path = resolve_path ( ( base_uri->path ?
00377                                             base_uri->path : "/" ),
00378                                           relative_uri->path );
00379                 tmp_uri.path = tmp_path;
00380                 tmp_uri.query = relative_uri->query;
00381                 tmp_uri.fragment = relative_uri->fragment;
00382         } else if ( relative_uri->query ) {
00383                 tmp_uri.query = relative_uri->query;
00384                 tmp_uri.fragment = relative_uri->fragment;
00385         } else if ( relative_uri->fragment ) {
00386                 tmp_uri.fragment = relative_uri->fragment;
00387         }
00388 
00389         /* Create demangled URI */
00390         new_uri = uri_dup ( &tmp_uri );
00391         free ( tmp_path );
00392         return new_uri;
00393 }

static int is_unreserved_uri_char ( int  c,
int  field 
) [static]

Test for unreserved URI characters.

Parameters:
c Character to test
field Field of URI in which character lies
Return values:
is_unreserved Character is an unreserved character

Definition at line 402 of file uri.c.

References isdigit, islower, isupper, URI_OPAQUE, URI_PATH, and URI_QUERY.

Referenced by uri_encode().

00402                                                        {
00403         /* According to RFC3986, the unreserved character set is
00404          *
00405          * A-Z a-z 0-9 - _ . ~
00406          *
00407          * but we also pass & ; = in queries, / in paths,
00408          * and everything in opaques
00409          */
00410         int ok = ( isupper ( c ) || islower ( c ) || isdigit ( c ) ||
00411                     ( c == '-' ) || ( c == '_' ) ||
00412                     ( c == '.' ) || ( c == '~' ) );
00413 
00414         if ( field == URI_QUERY )
00415                 ok = ok || ( c == ';' ) || ( c == '&' ) || ( c == '=' );
00416 
00417         if ( field == URI_PATH )
00418                 ok = ok || ( c == '/' );
00419 
00420         if ( field == URI_OPAQUE )
00421                 ok = 1;
00422 
00423         return ok;
00424 }

size_t uri_encode ( const char *  raw_string,
char *  buf,
ssize_t  len,
int  field 
)

URI-encode string.

Parameters:
raw_string String to be URI-encoded
buf Buffer to contain encoded string
len Length of buffer
field Field of URI in which string lies
Return values:
len Length of encoded string (excluding NUL)

Definition at line 435 of file uri.c.

References is_unreserved_uri_char(), and ssnprintf().

Referenced by boot_next_server_and_filename(), fetchf_uristring(), and unparse_uri().

00436                                 {
00437         ssize_t remaining = len;
00438         size_t used;
00439         unsigned char c;
00440 
00441         if ( len > 0 )
00442                 buf[0] = '\0';
00443 
00444         while ( ( c = *(raw_string++) ) ) {
00445                 if ( is_unreserved_uri_char ( c, field ) ) {
00446                         used = ssnprintf ( buf, remaining, "%c", c );
00447                 } else {
00448                         used = ssnprintf ( buf, remaining, "%%%02X", c );
00449                 }
00450                 buf += used;
00451                 remaining -= used;
00452         }
00453 
00454         return ( len - remaining );
00455 }

size_t uri_decode ( const char *  encoded_string,
char *  buf,
ssize_t  len 
)

Decode URI-encoded string.

Parameters:
encoded_string URI-encoded string
buf Buffer to contain decoded string
len Length of buffer
Return values:
len Length of decoded string (excluding NUL)
This function may be used in-place, with buf the same as encoded_string.

Definition at line 468 of file uri.c.

References snprintf(), and strtoul().

Referenced by parse_uri(), and storef_uristring().

00468                                                                          {
00469         ssize_t remaining;
00470         char hexbuf[3];
00471         char *hexbuf_end;
00472         unsigned char c;
00473 
00474         for ( remaining = len; *encoded_string; remaining-- ) {
00475                 if ( *encoded_string == '%' ) {
00476                         encoded_string++;
00477                         snprintf ( hexbuf, sizeof ( hexbuf ), "%s",
00478                                    encoded_string );
00479                         c = strtoul ( hexbuf, &hexbuf_end, 16 );
00480                         encoded_string += ( hexbuf_end - hexbuf );
00481                 } else {
00482                         c = *(encoded_string++);
00483                 }
00484                 if ( remaining > 1 )
00485                         *buf++ = c;
00486         }
00487 
00488         if ( len )
00489                 *buf = 0;
00490 
00491         return ( len - remaining );
00492 }


Generated on Tue Apr 6 20:01:15 2010 for gPXE by  doxygen 1.5.7.1