1 /* URL.java -- Uniform Resource Locator Class
2 Copyright (C) 1998, 1999, 2000, 2002, 2003, 2004
3 Free Software Foundation, Inc.
5 This file is part of GNU Classpath.
7 GNU Classpath is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Classpath is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Classpath; see the file COPYING. If not, write to the
19 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22 Linking this library statically or dynamically with other modules is
23 making a combined work based on this library. Thus, the terms and
24 conditions of the GNU General Public License cover the whole
27 As a special exception, the copyright holders of this library give you
28 permission to link this library with independent modules to produce an
29 executable, regardless of the license terms of these independent
30 modules, and to copy and distribute the resulting executable under
31 terms of your choice, provided that you also meet, for each linked
32 independent module, the terms and conditions of the license of that
33 module. An independent module is a module which is not derived from
34 or based on this library. If you modify this library, you may extend
35 this exception to your version of the library, but you are not
36 obligated to do so. If you do not wish to do so, delete this
37 exception statement from your version. */
41 import gnu.java.net.URLParseError;
42 import java.io.IOException;
43 import java.io.InputStream;
44 import java.io.ObjectInputStream;
45 import java.io.ObjectOutputStream;
46 import java.io.Serializable;
47 import java.util.HashMap;
48 import java.util.StringTokenizer;
52 * Written using on-line Java Platform 1.2 API Specification, as well
53 * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998).
54 * Status: Believed complete and correct.
58 * This final class represents an Internet Uniform Resource Locator (URL).
59 * For details on the syntax of URL's and what they can be used for,
60 * refer to RFC 1738, available from <a
61 * href="http://ds.internic.net/rfcs/rfc1738.txt">
62 * http://ds.internic.net/rfcs/rfc1738.txt</a>
64 * There are a great many protocols supported by URL's such as "http",
65 * "ftp", and "file". This object can handle any arbitrary URL for which
66 * a URLStreamHandler object can be written. Default protocol handlers
67 * are provided for the "http" and "ftp" protocols. Additional protocols
68 * handler implementations may be provided in the future. In any case,
69 * an application or applet can install its own protocol handlers that
70 * can be "chained" with other protocol hanlders in the system to extend
71 * the base functionality provided with this class. (Note, however, that
72 * unsigned applets cannot access properties by default or install their
73 * own protocol handlers).
75 * This chaining is done via the system property java.protocol.handler.pkgs
76 * If this property is set, it is assumed to be a "|" separated list of
77 * package names in which to attempt locating protocol handlers. The
78 * protocol handler is searched for by appending the string
79 * ".<protocol>.Handler" to each packed in the list until a hander is
80 * found. If a protocol handler is not found in this list of packages, or if
81 * the property does not exist, then the default protocol handler of
82 * "gnu.java.net.<protocol>.Handler" is tried. If this is
83 * unsuccessful, a MalformedURLException is thrown.
85 * All of the constructor methods of URL attempt to load a protocol
86 * handler and so any needed protocol handlers must be installed when
87 * the URL is constructed.
89 * Here is an example of how URL searches for protocol handlers. Assume
90 * the value of java.protocol.handler.pkgs is "com.foo|com.bar" and the
91 * URL is "news://comp.lang.java.programmer". URL would looking the
92 * following places for protocol handlers:
94 * com.foo.news.Handler
95 * com.bar.news.Handler
96 * gnu.java.net.news.Handler
98 * If the protocol handler is not found in any of those locations, a
99 * MalformedURLException would be thrown.
101 * Please note that a protocol handler must be a subclass of
104 * Normally, this class caches protocol handlers. Once it finds a handler
105 * for a particular protocol, it never tries to look up a new handler
106 * again. However, if the system property
107 * gnu.java.net.nocache_protocol_handlers is set, then this
108 * caching behavior is disabled. This property is specific to this
109 * implementation. Sun's JDK may or may not do protocol caching, but it
110 * almost certainly does not examine this property.
112 * Please also note that an application can install its own factory for
113 * loading protocol handlers (see setURLStreamHandlerFactory). If this is
114 * done, then the above information is superseded and the behavior of this
115 * class in loading protocol handlers is dependent on that factory.
117 * @author Aaron M. Renn <arenn@urbanophile.com>
118 * @author Warren Levy <warrenl@cygnus.com>
120 * @see URLStreamHandler
122 public final class URL implements Serializable
124 private static final String DEFAULT_SEARCH_PATH =
125 "gnu.java.net.protocol|sun.net.www.protocol";
128 * The name of the protocol for this URL.
129 * The protocol is always stored in lower case.
131 private String protocol;
134 * The "authority" portion of the URL.
136 private String authority;
139 * The hostname or IP address of this protocol.
140 * This includes a possible user. For example <code>joe@some.host.net</code>.
145 * The user information necessary to establish the connection.
147 private String userInfo;
150 * The port number of this protocol or -1 if the port number used is
151 * the default for this protocol.
153 private int port = -1; // Initialize for constructor using context.
156 * The "file" portion of the URL. It is defined as <code>path[?query]</code>.
161 * The anchor portion of the URL.
166 * This is the hashCode for this URL
168 private int hashCode;
171 * The protocol handler in use for this URL
173 transient URLStreamHandler ph;
176 * If an application installs its own protocol handler factory, this is
177 * where we keep track of it.
179 private static URLStreamHandlerFactory factory;
180 private static final long serialVersionUID = -7627629688361524110L;
183 * This a table where we cache protocol handlers to avoid the overhead
184 * of looking them up each time.
186 private static HashMap ph_cache = new HashMap();
189 * Whether or not to cache protocol handlers.
191 private static boolean cache_handlers;
195 String s = System.getProperty("gnu.java.net.nocache_protocol_handlers");
198 cache_handlers = true;
200 cache_handlers = false;
204 * Constructs a URL and loads a protocol handler for the values passed as
207 * @param protocol The protocol for this URL ("http", "ftp", etc)
208 * @param host The hostname or IP address to connect to
209 * @param port The port number to use, or -1 to use the protocol's
211 * @param file The "file" portion of the URL.
213 * @exception MalformedURLException If a protocol handler cannot be loaded or
214 * a parse error occurs.
216 public URL(String protocol, String host, int port, String file)
217 throws MalformedURLException
219 this(protocol, host, port, file, null);
223 * Constructs a URL and loads a protocol handler for the values passed in
224 * as arugments. Uses the default port for the protocol.
226 * @param protocol The protocol for this URL ("http", "ftp", etc)
227 * @param host The hostname or IP address for this URL
228 * @param file The "file" portion of this URL.
230 * @exception MalformedURLException If a protocol handler cannot be loaded or
231 * a parse error occurs.
233 public URL(String protocol, String host, String file)
234 throws MalformedURLException
236 this(protocol, host, -1, file, null);
240 * This method initializes a new instance of <code>URL</code> with the
241 * specified protocol, host, port, and file. Additionally, this method
242 * allows the caller to specify a protocol handler to use instead of
243 * the default. If this handler is specified, the caller must have
244 * the "specifyStreamHandler" permission (see <code>NetPermission</code>)
245 * or a <code>SecurityException</code> will be thrown.
247 * @param protocol The protocol for this URL ("http", "ftp", etc)
248 * @param host The hostname or IP address to connect to
249 * @param port The port number to use, or -1 to use the protocol's default
251 * @param file The "file" portion of the URL.
252 * @param ph The protocol handler to use with this URL.
254 * @exception MalformedURLException If no protocol handler can be loaded
255 * for the specified protocol.
256 * @exception SecurityException If the <code>SecurityManager</code> exists
257 * and does not allow the caller to specify its own protocol handler.
261 public URL(String protocol, String host, int port, String file,
262 URLStreamHandler ph) throws MalformedURLException
264 if (protocol == null)
265 throw new MalformedURLException("null protocol");
266 protocol = protocol.toLowerCase();
267 this.protocol = protocol;
271 SecurityManager s = System.getSecurityManager();
273 s.checkPermission(new NetPermission("specifyStreamHandler"));
278 this.ph = getURLStreamHandler(protocol);
281 throw new MalformedURLException("Protocol handler not found: "
286 this.authority = (host != null) ? host : "";
288 this.authority += ":" + port;
290 int hashAt = file.indexOf('#');
298 this.file = file.substring(0, hashAt);
299 this.ref = file.substring(hashAt + 1);
301 hashCode = hashCode(); // Used for serialization.
305 * Initializes a URL from a complete string specification such as
306 * "http://www.urbanophile.com/arenn/". First the protocol name is parsed
307 * out of the string. Then a handler is located for that protocol and
308 * the parseURL() method of that protocol handler is used to parse the
311 * @param spec The complete String representation of a URL
313 * @exception MalformedURLException If a protocol handler cannot be found
314 * or the URL cannot be parsed
316 public URL(String spec) throws MalformedURLException
318 this((URL) null, spec, (URLStreamHandler) null);
322 * This method parses a String representation of a URL within the
323 * context of an existing URL. Principally this means that any
324 * fields not present the URL are inheritied from the context URL.
325 * This allows relative URL's to be easily constructed. If the
326 * context argument is null, then a complete URL must be specified
327 * in the URL string. If the protocol parsed out of the URL is
328 * different from the context URL's protocol, then then URL String
329 * is also expected to be a complete URL.
331 * @param context The context on which to parse the specification
332 * @param spec The string to parse an URL
334 * @exception MalformedURLException If a protocol handler cannot be found
335 * for the URL cannot be parsed
337 public URL(URL context, String spec) throws MalformedURLException
339 this(context, spec, (URLStreamHandler) null);
343 * Creates an URL from given arguments
344 * This method parses a String representation of a URL within the
345 * context of an existing URL. Principally this means that any fields
346 * not present the URL are inheritied from the context URL. This allows
347 * relative URL's to be easily constructed. If the context argument is
348 * null, then a complete URL must be specified in the URL string.
349 * If the protocol parsed out of the URL is different
350 * from the context URL's protocol, then then URL String is also
351 * expected to be a complete URL.
353 * Additionally, this method allows the caller to specify a protocol handler
354 * to use instead of the default. If this handler is specified, the caller
355 * must have the "specifyStreamHandler" permission
356 * (see <code>NetPermission</code>) or a <code>SecurityException</code>
359 * @param context The context in which to parse the specification
360 * @param spec The string to parse as an URL
361 * @param ph The stream handler for the URL
363 * @exception MalformedURLException If a protocol handler cannot be found
364 * or the URL cannot be parsed
365 * @exception SecurityException If the <code>SecurityManager</code> exists
366 * and does not allow the caller to specify its own protocol handler.
370 public URL(URL context, String spec, URLStreamHandler ph)
371 throws MalformedURLException
373 /* A protocol is defined by the doc as the substring before a ':'
374 * as long as the ':' occurs before any '/'.
376 * If context is null, then spec must be an absolute URL.
378 * The relative URL need not specify all the components of a URL.
379 * If the protocol, host name, or port number is missing, the value
380 * is inherited from the context. A bare file component is appended
381 * to the context's file. The optional anchor is not inherited.
384 // If this is an absolute URL, then ignore context completely.
385 // An absolute URL must have chars prior to "://" but cannot have a colon
386 // right after the "://". The second colon is for an optional port value
387 // and implies that the host from the context is used if available.
389 if ((colon = spec.indexOf("://", 1)) > 0
390 && ! spec.regionMatches(colon, "://:", 0, 4))
394 if ((colon = spec.indexOf(':')) > 0
395 && (colon < (slash = spec.indexOf('/')) || slash < 0))
397 // Protocol specified in spec string.
398 protocol = spec.substring(0, colon).toLowerCase();
399 if (context != null && context.protocol.equals(protocol))
401 // The 1.2 doc specifically says these are copied to the new URL.
405 userInfo = context.userInfo;
406 if (file == null || file.length() == 0)
408 authority = context.authority;
411 else if (context != null)
413 // Protocol NOT specified in spec string.
414 // Use context fields (except ref) as a foundation for relative URLs.
416 protocol = context.protocol;
420 userInfo = context.userInfo;
421 if (file == null || file.length() == 0)
423 authority = context.authority;
425 else // Protocol NOT specified in spec. and no context available.
428 throw new MalformedURLException("Absolute URL required with null context");
432 SecurityManager s = System.getSecurityManager();
434 s.checkPermission(new NetPermission("specifyStreamHandler"));
439 this.ph = getURLStreamHandler(protocol);
442 throw new MalformedURLException("Protocol handler not found: "
445 // JDK 1.2 doc for parseURL specifically states that any '#' ref
446 // is to be excluded by passing the 'limit' as the indexOf the '#'
447 // if one exists, otherwise pass the end of the string.
448 int hashAt = spec.indexOf('#', colon + 1);
452 this.ph.parseURL(this, spec, colon + 1,
453 hashAt < 0 ? spec.length() : hashAt);
455 catch (URLParseError e)
457 throw new MalformedURLException(e.getMessage());
461 ref = spec.substring(hashAt + 1);
463 hashCode = hashCode(); // Used for serialization.
467 * Test another URL for equality with this one. This will be true only if
468 * the argument is non-null and all of the fields in the URL's match
469 * exactly (ie, protocol, host, port, file, and ref). Overrides
470 * Object.equals(), implemented by calling the equals method of the handler.
472 * @param obj The URL to compare with
474 * @return true if the URL is equal, false otherwise
476 public boolean equals(Object obj)
478 if (! (obj instanceof URL))
481 return ph.equals(this, (URL) obj);
485 * Returns the contents of this URL as an object by first opening a
486 * connection, then calling the getContent() method against the connection
488 * @return A content object for this URL
489 * @exception IOException If opening the connection or getting the
494 public Object getContent() throws IOException
496 return openConnection().getContent();
500 * Gets the contents of this URL
502 * @param classes The allow classes for the content object.
504 * @return a context object for this URL.
506 * @exception IOException If an error occurs
508 public Object getContent(Class[] classes) throws IOException
510 // FIXME: implement this
515 * Returns the file portion of the URL.
516 * Defined as <code>path[?query]</code>.
517 * Returns the empty string if there is no file portion.
519 * @return The filename specified in this URL, or an empty string if empty.
521 public String getFile()
523 return file == null ? "" : file;
527 * Returns the path of the URL. This is the part of the file before any '?'
530 * @return The path specified in this URL, or null if empty.
534 public String getPath()
538 int quest = file.indexOf('?');
539 return quest < 0 ? getFile() : file.substring(0, quest);
543 * Returns the authority of the URL
545 * @return The authority specified in this URL.
549 public String getAuthority()
555 * Returns the host of the URL
557 * @return The host specified in this URL.
559 public String getHost()
561 int at = (host == null) ? -1 : host.indexOf('@');
562 return at < 0 ? host : host.substring(at + 1, host.length());
566 * Returns the port number of this URL or -1 if the default port number is
569 * @return The port number
571 * @see #getDefaultPort()
579 * Returns the default port of the URL. If the StreamHandler for the URL
580 * protocol does not define a default port it returns -1.
582 * @return The default port of the current protocol.
584 public int getDefaultPort()
586 return ph.getDefaultPort();
590 * Returns the protocol of the URL
592 * @return The specified protocol.
594 public String getProtocol()
600 * Returns the ref (sometimes called the "# reference" or "anchor") portion
605 public String getRef()
611 * Returns the user information of the URL. This is the part of the host
612 * name before the '@'.
614 * @return the user at a particular host or null when no user defined.
616 public String getUserInfo()
618 if (userInfo != null)
620 int at = (host == null) ? -1 : host.indexOf('@');
621 return at < 0 ? null : host.substring(0, at);
625 * Returns the query of the URL. This is the part of the file before the
628 * @return the query part of the file, or null when there is no query part.
630 public String getQuery()
632 int quest = (file == null) ? -1 : file.indexOf('?');
633 return quest < 0 ? null : file.substring(quest + 1, file.length());
637 * Returns a hashcode computed by the URLStreamHandler of this URL
639 * @return The hashcode for this URL.
641 public int hashCode()
644 return hashCode; // Use cached value if available.
646 return ph.hashCode(this);
650 * Returns a URLConnection object that represents a connection to the remote
651 * object referred to by the URL. The URLConnection is created by calling the
652 * openConnection() method of the protocol handler
654 * @return A URLConnection for this URL
656 * @exception IOException If an error occurs
658 public URLConnection openConnection() throws IOException
660 return ph.openConnection(this);
664 * Opens a connection to this URL and returns an InputStream for reading
665 * from that connection
667 * @return An <code>InputStream</code> for this URL.
669 * @exception IOException If an error occurs
671 public InputStream openStream() throws IOException
673 return openConnection().getInputStream();
677 * Tests whether or not another URL refers to the same "file" as this one.
678 * This will be true if and only if the passed object is not null, is a
679 * URL, and matches all fields but the ref (ie, protocol, host, port,
682 * @param url The URL object to test with
684 * @return true if URL matches this URL's file, false otherwise
686 public boolean sameFile(URL url)
688 return ph.sameFile(this, url);
692 * Sets the specified fields of the URL. This is not a public method so
693 * that only URLStreamHandlers can modify URL fields. This might be called
694 * by the <code>parseURL()</code> method in that class. URLs are otherwise
697 * @param protocol The protocol name for this URL
698 * @param host The hostname or IP address for this URL
699 * @param port The port number of this URL
700 * @param file The "file" portion of this URL.
701 * @param ref The anchor portion of this URL.
703 protected void set(String protocol, String host, int port, String file,
706 // TBD: Theoretically, a poorly written StreamHandler could pass an
707 // invalid protocol. It will cause the handler to be set to null
708 // thus overriding a valid handler. Callers of this method should
710 protocol = protocol.toLowerCase ();
711 if (! this.protocol.equals (protocol))
713 this.ph = getURLStreamHandler(protocol);
714 this.protocol = protocol;
723 this.authority += host;
725 this.authority += ":" + port;
727 hashCode = hashCode(); // Used for serialization.
731 * Sets the specified fields of the URL. This is not a public method so
732 * that only URLStreamHandlers can modify URL fields. URLs are otherwise
735 * @param protocol The protocol name for this URL.
736 * @param host The hostname or IP address for this URL.
737 * @param port The port number of this URL.
738 * @param authority The authority of this URL.
739 * @param userInfo The user and password (if needed) of this URL.
740 * @param path The "path" portion of this URL.
741 * @param query The query of this URL.
742 * @param ref The anchor portion of this URL.
746 protected void set(String protocol, String host, int port, String authority,
747 String userInfo, String path, String query, String ref)
749 // TBD: Theoretically, a poorly written StreamHandler could pass an
750 // invalid protocol. It will cause the handler to be set to null
751 // thus overriding a valid handler. Callers of this method should
753 protocol = protocol.toLowerCase ();
754 if (! this.protocol.equals (protocol))
756 this.ph = getURLStreamHandler(protocol);
757 this.protocol = protocol;
760 this.userInfo = userInfo;
762 this.authority = authority;
766 this.file = path + "?" + query;
768 hashCode = hashCode(); // Used for serialization.
772 * Sets the URLStreamHandlerFactory for this class. This factory is
773 * responsible for returning the appropriate protocol handler for
776 * @param fac The URLStreamHandlerFactory class to use
778 * @exception Error If the factory is alread set.
779 * @exception SecurityException If a security manager exists and its
780 * checkSetFactory method doesn't allow the operation
782 public static synchronized void setURLStreamHandlerFactory(URLStreamHandlerFactory fac)
785 throw new Error("URLStreamHandlerFactory already set");
787 // Throw an exception if an extant security mgr precludes
788 // setting the factory.
789 SecurityManager s = System.getSecurityManager();
796 * Returns a String representing this URL. The String returned is
797 * created by calling the protocol handler's toExternalForm() method.
799 * @return A string for this URL
801 public String toExternalForm()
803 // Identical to toString().
804 return ph.toExternalForm(this);
808 * Returns a String representing this URL. Identical to toExternalForm().
809 * The value returned is created by the protocol handler's
810 * toExternalForm method. Overrides Object.toString()
812 * @return A string for this URL
814 public String toString()
816 // Identical to toExternalForm().
817 return ph.toExternalForm(this);
821 * This internal method is used in two different constructors to load
822 * a protocol handler for this URL.
824 * @param protocol The protocol to load a handler for
826 * @return A URLStreamHandler for this protocol, or null when not found.
828 private static synchronized URLStreamHandler getURLStreamHandler(String protocol)
830 URLStreamHandler ph = null;
832 // First, see if a protocol handler is in our cache.
835 if ((ph = (URLStreamHandler) ph_cache.get(protocol)) != null)
839 // If a non-default factory has been set, use it to find the protocol.
842 ph = factory.createURLStreamHandler(protocol);
844 else if (protocol.equals("core"))
846 ph = new gnu.java.net.protocol.core.Handler();
848 else if (protocol.equals("file"))
850 // This is an interesting case. It's tempting to think that we
851 // could call Class.forName ("gnu.java.net.protocol.file.Handler") to
852 // get the appropriate class. Unfortunately, if we do that the
853 // program will never terminate, because getURLStreamHandler is
854 // eventually called by Class.forName.
856 // Treating "file" as a special case is the minimum that will
857 // fix this problem. If other protocols are required in a
858 // statically linked application they will need to be handled in
859 // the same way as "file".
860 ph = new gnu.java.net.protocol.file.Handler();
863 // Non-default factory may have returned null or a factory wasn't set.
864 // Use the default search algorithm to find a handler for this protocol.
867 // Get the list of packages to check and append our default handler
868 // to it, along with the JDK specified default as a last resort.
869 // Except in very unusual environments the JDK specified one shouldn't
870 // ever be needed (or available).
871 String ph_search_path =
872 System.getProperty("java.protocol.handler.pkgs");
874 // Tack our default package on at the ends.
875 if (ph_search_path != null)
876 ph_search_path += "|" + DEFAULT_SEARCH_PATH;
878 ph_search_path = DEFAULT_SEARCH_PATH;
880 // Finally loop through our search path looking for a match.
881 StringTokenizer pkgPrefix = new StringTokenizer(ph_search_path, "|");
886 (pkgPrefix.nextToken() + "." + protocol + ".Handler");
890 Object obj = Class.forName(clsName).newInstance();
892 if (! (obj instanceof URLStreamHandler))
895 ph = (URLStreamHandler) obj;
899 // Can't instantiate; handler still null,
900 // go on to next element.
903 while ((! (ph instanceof URLStreamHandler))
904 && pkgPrefix.hasMoreTokens());
907 // Update the hashtable with the new protocol handler.
908 if (ph != null && cache_handlers)
909 if (ph instanceof URLStreamHandler)
910 ph_cache.put(protocol, ph);
917 private void readObject(ObjectInputStream ois)
918 throws IOException, ClassNotFoundException
920 ois.defaultReadObject();
921 this.ph = getURLStreamHandler(protocol);
923 throw new IOException("Handler for protocol " + protocol + " not found");
926 private void writeObject(ObjectOutputStream oos) throws IOException
928 oos.defaultWriteObject();