/*
* This file is modified by Ivan Maidanski
* A URI instance represents that defined by
* RFC3986,
* with some deviations.
*
* At its highest level, a URI consists of:
* [scheme:]scheme-specific-part
* [#fragment]
*
* where # and : are literal characters, * and those parts enclosed in square brackets are optional. *
** There are two main types of URI. An opaque URI is one * which just consists of the above three parts, and is not further * defined. An example of such a URI would be mailto: URI. * In contrast, hierarchical URIs give further definition * to the scheme-specific part, so as represent some part of a hierarchical * structure. *
*
* [//authority][path]
* [?query]
*
* with / and ? being literal characters. * When server-based, the authority section is further subdivided into: *
*
* [user-info@]host
* [:port]
*
* with @ and : as literal characters. * Authority sections that are not server-based are said to be registry-based. *
** Hierarchical URIs can be either relative or absolute. Absolute URIs * always start with a `/', while relative URIs don't * specify a scheme. Opaque URIs are always absolute. *
*
* Each part of the URI may have one of three states: undefined, empty
* or containing some content. The former two of these are represented
* by null
and the empty string in Java, respectively.
* The scheme-specific part may never be undefined. It also follows from
* this that the path sub-part may also not be undefined, so as to ensure
* the former.
*
* The characters that can be used within a valid URI are restricted. * There are two main classes of characters which can't be used as is * within the URI: *
** The set of valid characters differs depending on the section of the URI: *
** These definitions reference the following sets of characters: *
*
* The constructors and accessor methods allow the use and retrieval of
* URI components which contain non-US-ASCII characters directly.
* They are only escaped when the toASCIIString()
method
* is used. In contrast, illegal characters are always quoted, with the
* exception of the return values of the non-raw accessors.
*
* Returns the string content of the specified group of the supplied * matcher. The returned value is modified according to the following: *
*null
is returned to indicate an undefined
* value. Otherwise, the value is truly the empty string and this is
* the returned value.* This method is used for matching against all parts of the URI * that may be either undefined or empty (i.e. all those but the * scheme-specific part and the path). In each case, the preceding * group is the content of the original group, along with some * additional distinguishing feature. For example, the preceding * group for the query includes the preceding question mark, * while that of the fragment includes the hash symbol. The presence * of these features enables disambiguation between the two cases * of a completely unspecified value and a simple non-existant value. * The scheme differs in that it will never return an empty string; * the delimiter follows the scheme rather than preceding it, so * it becomes part of the following section. The same is true * of the user information. *
* * @param match the matcher, which contains the results of the URI * matched against the URI regular expression. * @return either the matched content,null
for undefined
* values, or an empty string for a URI part with empty content.
*/
private static String getURIGroup(Matcher match, int group)
{
String matched = match.group(group);
if (matched == null || matched.length() == 0)
{
String prevMatched = match.group(group -1);
if (prevMatched == null || prevMatched.length() == 0)
return null;
else
return "";
}
return matched;
}
/**
* Sets fields of this URI by parsing the given string.
*
* @param str The string to parse
*
* @exception URISyntaxException If the given string violates RFC 2396
*/
private void parseURI(String str) throws URISyntaxException
{
if (URI_PATTERN == null)
URI_PATTERN = Pattern.compile(URI_REGEXP);
Matcher matcher = URI_PATTERN.matcher(str);
if (matcher.matches())
{
scheme = getURIGroup(matcher, SCHEME_GROUP);
rawSchemeSpecificPart = matcher.group(SCHEME_SPEC_PART_GROUP);
schemeSpecificPart = unquote(rawSchemeSpecificPart);
if (!isOpaque())
{
rawAuthority = getURIGroup(matcher, AUTHORITY_GROUP);
rawPath = matcher.group(PATH_GROUP);
rawQuery = getURIGroup(matcher, QUERY_GROUP);
}
rawFragment = getURIGroup(matcher, FRAGMENT_GROUP);
}
else
throw new URISyntaxException(str,
"doesn't match URI regular expression");
parseServerAuthority();
// We must eagerly unquote the parts, because this is the only time
// we may throw an exception.
authority = unquote(rawAuthority);
userInfo = unquote(rawUserInfo);
host = unquote(rawHost);
path = unquote(rawPath);
query = unquote(rawQuery);
fragment = unquote(rawFragment);
}
/**
* Unquote "%" + hex quotes characters
*
* @param str The string to unquote or null.
*
* @return The unquoted string or null if str was null.
*
* @exception URISyntaxException If the given string contains invalid
* escape sequences.
*/
private static String unquote(String str) throws URISyntaxException
{
if (str == null)
return null;
byte[] buf = new byte[str.length()];
int pos = 0;
for (int i = 0; i < str.length(); i++)
{
char c = str.charAt(i);
if (c == '%')
{
if (i + 2 >= str.length())
throw new URISyntaxException(str, "Invalid quoted character");
int hi = Character.digit(str.charAt(++i), 16);
int lo = Character.digit(str.charAt(++i), 16);
if (lo < 0 || hi < 0)
throw new URISyntaxException(str, "Invalid quoted character");
buf[pos++] = (byte) (hi * 16 + lo);
}
else
buf[pos++] = (byte) c;
}
try
{
return new String(buf, 0, pos, "utf-8");
}
catch (java.io.UnsupportedEncodingException x2)
{
throw (Error) new InternalError().initCause(x2);
}
}
/**
* Quote characters illegal in URIs in given string.
*
* Replace illegal characters by encoding their UTF-8
* representation as "%" + hex code for each resulting
* UTF-8 character.
*
* @param str The string to quote
*
* @return The quoted string.
*/
private static String quote(String str)
{
return quote(str, RFC3986_SSP);
}
/**
* Quote characters illegal in URI authorities in given string.
*
* Replace illegal characters by encoding their UTF-8
* representation as "%" + hex code for each resulting
* UTF-8 character.
*
* @param str The string to quote
*
* @return The quoted string.
*/
private static String quoteAuthority(String str)
{
// Technically, we should be using RFC2396_AUTHORITY, but
// it contains no additional characters.
return quote(str, RFC3986_REG_NAME);
}
/**
* Quotes the characters in the supplied string that are not part of
* the specified set of legal characters.
*
* @param str the string to quote
* @param legalCharacters the set of legal characters
*
* @return the quoted string.
*/
private static String quote(String str, String legalCharacters)
{
StringBuilder sb = new StringBuilder(str.length());
for (int i = 0; i < str.length(); i++)
{
char c = str.charAt(i);
if ((legalCharacters.indexOf(c) == -1)
&& (c <= 127))
{
sb.append('%');
sb.append(HEX.charAt(c / 16));
sb.append(HEX.charAt(c % 16));
}
else
sb.append(c);
}
return sb.toString();
}
/**
* Quote characters illegal in URI hosts in given string.
*
* Replace illegal characters by encoding their UTF-8
* representation as "%" + hex code for each resulting
* UTF-8 character.
*
* @param str The string to quote
*
* @return The quoted string.
*/
private static String quoteHost(String str)
{
return quote(str, RFC3986_HOST);
}
/**
* Quote characters illegal in URI paths in given string.
*
* Replace illegal characters by encoding their UTF-8
* representation as "%" + hex code for each resulting
* UTF-8 character.
*
* @param str The string to quote
*
* @return The quoted string.
*/
private static String quotePath(String str)
{
// Technically, we should be using RFC2396_PATH, but
// it contains no additional characters.
return quote(str, RFC3986_PATH_SEGMENTS);
}
/**
* Quote characters illegal in URI user infos in given string.
*
* Replace illegal characters by encoding their UTF-8
* representation as "%" + hex code for each resulting
* UTF-8 character.
*
* @param str The string to quote
*
* @return The quoted string.
*/
private static String quoteUserInfo(String str)
{
return quote(str, RFC3986_USERINFO);
}
/**
* Creates an URI from the given string
*
* @param str The string to create the URI from
*
* @exception URISyntaxException If the given string violates RFC 2396
* @exception NullPointerException If str is null
*/
public URI(String str) throws URISyntaxException
{
this.string = str;
parseURI(str);
}
/**
* Create an URI from the given components
*
* @param scheme The scheme name
* @param userInfo The username and authorization info
* @param host The hostname
* @param port The port number
* @param path The path
* @param query The query
* @param fragment The fragment
*
* @exception URISyntaxException If the given string violates RFC 2396
*/
public URI(String scheme, String userInfo, String host, int port,
String path, String query, String fragment)
throws URISyntaxException
{
this((scheme == null ? "" : scheme + ":")
+ (userInfo == null && host == null && port == -1 ? "" : "//")
+ (userInfo == null ? "" : quoteUserInfo(userInfo) + "@")
+ (host == null ? "" : quoteHost(host))
+ (port == -1 ? "" : ":" + String.valueOf(port))
+ (path == null ? "" : quotePath(path))
+ (query == null ? "" : "?" + quote(query))
+ (fragment == null ? "" : "#" + quote(fragment)));
}
/**
* Create an URI from the given components
*
* @param scheme The scheme name
* @param authority The authority
* @param path The apth
* @param query The query
* @param fragment The fragment
*
* @exception URISyntaxException If the given string violates RFC 2396
*/
public URI(String scheme, String authority, String path, String query,
String fragment) throws URISyntaxException
{
this((scheme == null ? "" : scheme + ":")
+ (authority == null ? "" : "//" + quoteAuthority(authority))
+ (path == null ? "" : quotePath(path))
+ (query == null ? "" : "?" + quote(query))
+ (fragment == null ? "" : "#" + quote(fragment)));
}
/**
* Create an URI from the given components
*
* @param scheme The scheme name
* @param host The hostname
* @param path The path
* @param fragment The fragment
*
* @exception URISyntaxException If the given string violates RFC 2396
*/
public URI(String scheme, String host, String path, String fragment)
throws URISyntaxException
{
this(scheme, null, host, -1, path, null, fragment);
}
/**
* Create an URI from the given components
*
* @param scheme The scheme name
* @param ssp The scheme specific part
* @param fragment The fragment
*
* @exception URISyntaxException If the given string violates RFC 2396
*/
public URI(String scheme, String ssp, String fragment)
throws URISyntaxException
{
this((scheme == null ? "" : scheme + ":")
+ (ssp == null ? "" : quote(ssp))
+ (fragment == null ? "" : "#" + quote(fragment)));
}
/**
* Create an URI from the given string
*
* @param str The string to create the URI from
*
* @exception IllegalArgumentException If the given string violates RFC 2396
* @exception NullPointerException If str is null
*/
public static URI create(String str)
{
try
{
return new URI(str);
}
catch (URISyntaxException e)
{
throw (IllegalArgumentException) new IllegalArgumentException()
.initCause(e);
}
}
/**
* Attempts to parse this URI's authority component, if defined,
* into user-information, host, and port components. The purpose
* of this method was to disambiguate between some authority sections,
* which form invalid server-based authories, but valid registry
* based authorities. In the updated RFC 3986, the authority section
* is defined differently, with registry-based authorities part of
* the host section. Thus, this method is now simply an explicit
* way of parsing any authority section.
*
* @return the URI, with the authority section parsed into user
* information, host and port components.
* @throws URISyntaxException if the given string violates RFC 2396
*/
public URI parseServerAuthority() throws URISyntaxException
{
if (rawAuthority != null)
{
if (AUTHORITY_PATTERN == null)
AUTHORITY_PATTERN = Pattern.compile(AUTHORITY_REGEXP);
Matcher matcher = AUTHORITY_PATTERN.matcher(rawAuthority);
if (matcher.matches())
{
rawUserInfo = getURIGroup(matcher, AUTHORITY_USERINFO_GROUP);
rawHost = getURIGroup(matcher, AUTHORITY_HOST_GROUP);
String portStr = getURIGroup(matcher, AUTHORITY_PORT_GROUP);
if (portStr != null)
try
{
port = Integer.parseInt(portStr);
}
catch (NumberFormatException e)
{
URISyntaxException use =
new URISyntaxException
(string, "doesn't match URI regular expression");
use.initCause(e);
throw use;
}
}
else
throw new URISyntaxException(string,
"doesn't match URI regular expression");
}
return this;
}
/**
* * Returns a normalized version of the URI. If the URI is opaque, * or its path is already in normal form, then this URI is simply * returned. Otherwise, the following transformation of the path * element takes place: *
** The resulting URI will be free of `.' and `..' segments, barring those * that were prepended or which couldn't be paired, respectively. *
* * @return the normalized URI. */ public URI normalize() { if (isOpaque() || path.indexOf("/./") == -1 && path.indexOf("/../") == -1) return this; try { return new URI(scheme, authority, normalizePath(path), query, fragment); } catch (URISyntaxException e) { throw (Error) new InternalError("Normalized URI variant could not "+ "be constructed").initCause(e); } } /** ** Normalize the given path. The following transformation takes place: *
** The resulting URI will be free of `.' and `..' segments, barring those * that were prepended or which couldn't be paired, respectively. *
* * @param relativePath the relative path to be normalized. * @return the normalized path. */ private String normalizePath(String relativePath) { /* This follows the algorithm in section 5.2.4. of RFC3986, but doesn't modify the input buffer. */ StringBuilder input = new StringBuilder(relativePath); StringBuilder output = new StringBuilder(); int start = 0; while (start < input.length()) { /* A */ if (input.indexOf("../",start) == start) { start += 3; continue; } if (input.indexOf("./",start) == start) { start += 2; continue; } /* B */ if (input.indexOf("/./",start) == start) { start += 2; continue; } if (input.indexOf("/.",start) == start && input.charAt(start + 2) != '.') { start += 1; input.setCharAt(start,'/'); continue; } /* C */ if (input.indexOf("/../",start) == start) { start += 3; removeLastSegment(output); continue; } if (input.indexOf("/..",start) == start) { start += 2; input.setCharAt(start,'/'); removeLastSegment(output); continue; } /* D */ if (start == input.length() - 1 && input.indexOf(".",start) == start) { input.delete(0,1); continue; } if (start == input.length() - 2 && input.indexOf("..",start) == start) { input.delete(0,2); continue; } /* E */ int indexOfSlash = input.indexOf("/",start); while (indexOfSlash == start) { output.append("/"); ++start; indexOfSlash = input.indexOf("/",start); } if (indexOfSlash == -1) indexOfSlash = input.length(); output.append(input.substring(start, indexOfSlash)); start = indexOfSlash; } return output.toString(); } /** * Removes the last segment of the path from the specified buffer. * * @param buffer the buffer containing the path. */ private void removeLastSegment(StringBuilder buffer) { int lastSlash = buffer.lastIndexOf("/"); if (lastSlash == -1) buffer.setLength(0); else buffer.setLength(lastSlash); } /** * Resolves the given URI against this URI * * @param uri The URI to resolve against this URI * * @return The resulting URI, or null when it couldn't be resolved * for some reason. * * @throws NullPointerException if uri is null */ public URI resolve(URI uri) { if (uri.isAbsolute()) return uri; if (uri.isOpaque()) return uri; String scheme = uri.getScheme(); String schemeSpecificPart = uri.getSchemeSpecificPart(); String authority = uri.getAuthority(); String path = uri.getPath(); String query = uri.getQuery(); String fragment = uri.getFragment(); try { if (fragment != null && path != null && path.equals("") && scheme == null && authority == null && query == null) return new URI(this.scheme, this.schemeSpecificPart, fragment); if (authority == null) { authority = this.authority; if (path == null) path = ""; if (! (path.startsWith("/"))) { StringBuilder basepath = new StringBuilder(this.path); int i = this.path.lastIndexOf('/'); if (i >= 0) basepath.delete(i + 1, basepath.length()); basepath.append(path); path = normalizePath(basepath.toString()); } } return new URI(this.scheme, authority, path, query, fragment); } catch (URISyntaxException e) { throw (Error) new InternalError("Resolved URI variant could not "+ "be constructed").initCause(e); } } /** * Resolves the given URI string against this URI * * @param str The URI as string to resolve against this URI * * @return The resulting URI * * @throws IllegalArgumentException If the given URI string * violates RFC 2396 * @throws NullPointerException If uri is null */ public URI resolve(String str) throws IllegalArgumentException { return resolve(create(str)); } /** ** Relativizes the given URI against this URI. The following * algorithm is used: *
*
* Compares the URI with the given object for equality. If the
* object is not a URI
, then the method returns false.
* Otherwise, the following criteria are observed:
*
true
if the objects are equal, according to
* the specification above.
*/
public boolean equals(Object obj)
{
if (!(obj instanceof URI))
return false;
URI uriObj = (URI) obj;
if (scheme == null)
{
if (uriObj.getScheme() != null)
return false;
}
else
if (!(scheme.equalsIgnoreCase(uriObj.getScheme())))
return false;
if (rawFragment == null)
{
if (uriObj.getRawFragment() != null)
return false;
}
else
if (!(rawFragment.equalsIgnoreCase(uriObj.getRawFragment())))
return false;
boolean opaqueThis = isOpaque();
boolean opaqueObj = uriObj.isOpaque();
if (opaqueThis && opaqueObj)
return rawSchemeSpecificPart.equals(uriObj.getRawSchemeSpecificPart());
else if (!opaqueThis && !opaqueObj)
{
boolean common = rawPath.equalsIgnoreCase(uriObj.getRawPath())
&& ((rawQuery == null && uriObj.getRawQuery() == null)
|| rawQuery.equalsIgnoreCase(uriObj.getRawQuery()));
if (rawAuthority == null && uriObj.getRawAuthority() == null)
return common;
if (host == null)
return common
&& rawAuthority.equalsIgnoreCase(uriObj.getRawAuthority());
return common
&& host.equalsIgnoreCase(uriObj.getHost())
&& port == uriObj.getPort()
&& (rawUserInfo == null ?
uriObj.getRawUserInfo() == null :
rawUserInfo.equalsIgnoreCase(uriObj.getRawUserInfo()));
}
else
return false;
}
/**
* Computes the hashcode of the URI
*/
public int hashCode()
{
return (getScheme() == null ? 0 : 13 * getScheme().hashCode())
+ 17 * getRawSchemeSpecificPart().hashCode()
+ (getRawFragment() == null ? 0 : 21 + getRawFragment().hashCode());
}
/**
* Compare the URI with another object that must also be a URI.
* Undefined components are taken to be less than any other component.
* The following criteria are observed:
*
* toString()
for URIs that don't contain any non-US-ASCII
* characters. Otherwise, the non-US-ASCII characters are replaced
* by their percent-encoded representations.
*
* @return a string representation of the URI, containing only US-ASCII
* characters.
*/
public String toASCIIString()
{
String strRep = toString();
boolean inNonAsciiBlock = false;
StringBuilder buffer = new StringBuilder();
StringBuilder encBuffer = null;
for (int i = 0; i < strRep.length(); i++)
{
char c = strRep.charAt(i);
if (c <= 127)
{
if (inNonAsciiBlock)
{
buffer.append(escapeCharacters(encBuffer.toString()));
inNonAsciiBlock = false;
}
buffer.append(c);
}
else
{
if (!inNonAsciiBlock)
{
encBuffer = new StringBuilder();
inNonAsciiBlock = true;
}
encBuffer.append(c);
}
}
return buffer.toString();
}
/**
* Converts the non-ASCII characters in the supplied string
* to their equivalent percent-encoded representations.
* That is, they are replaced by "%" followed by their hexadecimal value.
*
* @param str a string including non-ASCII characters.
* @return the string with the non-ASCII characters converted to their
* percent-encoded representations.
*/
private static String escapeCharacters(String str)
{
try
{
StringBuilder sb = new StringBuilder();
// this is far from optimal, but it works
byte[] utf8 = str.getBytes("utf-8");
for (int j = 0; j < utf8.length; j++)
{
sb.append('%');
sb.append(HEX.charAt((utf8[j] & 0xff) / 16));
sb.append(HEX.charAt((utf8[j] & 0xff) % 16));
}
return sb.toString();
}
catch (java.io.UnsupportedEncodingException x)
{
throw (Error) new InternalError("Escaping error").initCause(x);
}
}
}