1 /* URL.java -- Uniform Resource Locator Class
2 Copyright (C) 1998, 1999, 2000, 2002, 2003, 2004
3 Free Software Foundation, Inc.
5 This file is part of GNU Classpath.
7 GNU Classpath is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Classpath is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Classpath; see the file COPYING. If not, write to the
19 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22 Linking this library statically or dynamically with other modules is
23 making a combined work based on this library. Thus, the terms and
24 conditions of the GNU General Public License cover the whole
27 As a special exception, the copyright holders of this library give you
28 permission to link this library with independent modules to produce an
29 executable, regardless of the license terms of these independent
30 modules, and to copy and distribute the resulting executable under
31 terms of your choice, provided that you also meet, for each linked
32 independent module, the terms and conditions of the license of that
33 module. An independent module is a module which is not derived from
34 or based on this library. If you modify this library, you may extend
35 this exception to your version of the library, but you are not
36 obligated to do so. If you do not wish to do so, delete this
37 exception statement from your version. */
41 import gnu.java.net.URLParseError;
42 import java.io.IOException;
43 import java.io.InputStream;
44 import java.io.ObjectInputStream;
45 import java.io.ObjectOutputStream;
46 import java.io.Serializable;
47 import java.util.HashMap;
48 import java.util.StringTokenizer;
52 * Written using on-line Java Platform 1.2 API Specification, as well
53 * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998).
54 * Status: Believed complete and correct.
58 * This final class represents an Internet Uniform Resource Locator (URL).
59 * For details on the syntax of URL's and what they can be used for,
60 * refer to RFC 1738, available from <a
61 * href="http://ds.internic.net/rfcs/rfc1738.txt">
62 * http://ds.internic.net/rfcs/rfc1738.txt</a>
64 * There are a great many protocols supported by URL's such as "http",
65 * "ftp", and "file". This object can handle any arbitrary URL for which
66 * a URLStreamHandler object can be written. Default protocol handlers
67 * are provided for the "http" and "ftp" protocols. Additional protocols
68 * handler implementations may be provided in the future. In any case,
69 * an application or applet can install its own protocol handlers that
70 * can be "chained" with other protocol hanlders in the system to extend
71 * the base functionality provided with this class. (Note, however, that
72 * unsigned applets cannot access properties by default or install their
73 * own protocol handlers).
75 * This chaining is done via the system property java.protocol.handler.pkgs
76 * If this property is set, it is assumed to be a "|" separated list of
77 * package names in which to attempt locating protocol handlers. The
78 * protocol handler is searched for by appending the string
79 * ".<protocol>.Handler" to each packed in the list until a hander is
80 * found. If a protocol handler is not found in this list of packages, or if
81 * the property does not exist, then the default protocol handler of
82 * "gnu.java.net.<protocol>.Handler" is tried. If this is
83 * unsuccessful, a MalformedURLException is thrown.
85 * All of the constructor methods of URL attempt to load a protocol
86 * handler and so any needed protocol handlers must be installed when
87 * the URL is constructed.
89 * Here is an example of how URL searches for protocol handlers. Assume
90 * the value of java.protocol.handler.pkgs is "com.foo|com.bar" and the
91 * URL is "news://comp.lang.java.programmer". URL would looking the
92 * following places for protocol handlers:
94 * com.foo.news.Handler
95 * com.bar.news.Handler
96 * gnu.java.net.news.Handler
98 * If the protocol handler is not found in any of those locations, a
99 * MalformedURLException would be thrown.
101 * Please note that a protocol handler must be a subclass of
104 * Normally, this class caches protocol handlers. Once it finds a handler
105 * for a particular protocol, it never tries to look up a new handler
106 * again. However, if the system property
107 * gnu.java.net.nocache_protocol_handlers is set, then this
108 * caching behavior is disabled. This property is specific to this
109 * implementation. Sun's JDK may or may not do protocol caching, but it
110 * almost certainly does not examine this property.
112 * Please also note that an application can install its own factory for
113 * loading protocol handlers (see setURLStreamHandlerFactory). If this is
114 * done, then the above information is superseded and the behavior of this
115 * class in loading protocol handlers is dependent on that factory.
117 * @author Aaron M. Renn <arenn@urbanophile.com>
118 * @author Warren Levy <warrenl@cygnus.com>
120 * @see URLStreamHandler
122 public final class URL implements Serializable
124 private static final String DEFAULT_SEARCH_PATH =
125 "gnu.java.net.protocol|gnu.inet";
128 * The name of the protocol for this URL.
129 * The protocol is always stored in lower case.
131 private String protocol;
134 * The "authority" portion of the URL.
136 private String authority;
139 * The hostname or IP address of this protocol.
140 * This includes a possible user. For example <code>joe@some.host.net</code>.
145 * The user information necessary to establish the connection.
147 private String userInfo;
150 * The port number of this protocol or -1 if the port number used is
151 * the default for this protocol.
153 private int port = -1; // Initialize for constructor using context.
156 * The "file" portion of the URL. It is defined as <code>path[?query]</code>.
161 * The anchor portion of the URL.
166 * This is the hashCode for this URL
168 private int hashCode;
171 * The protocol handler in use for this URL
173 transient URLStreamHandler ph;
176 * If an application installs its own protocol handler factory, this is
177 * where we keep track of it.
179 private static URLStreamHandlerFactory factory;
180 private static final long serialVersionUID = -7627629688361524110L;
183 * This a table where we cache protocol handlers to avoid the overhead
184 * of looking them up each time.
186 private static HashMap ph_cache = new HashMap();
189 * Whether or not to cache protocol handlers.
191 private static boolean cache_handlers;
195 String s = System.getProperty("gnu.java.net.nocache_protocol_handlers");
198 cache_handlers = true;
200 cache_handlers = false;
204 * Constructs a URL and loads a protocol handler for the values passed as
207 * @param protocol The protocol for this URL ("http", "ftp", etc)
208 * @param host The hostname or IP address to connect to
209 * @param port The port number to use, or -1 to use the protocol's
211 * @param file The "file" portion of the URL.
213 * @exception MalformedURLException If a protocol handler cannot be loaded or
214 * a parse error occurs.
216 public URL(String protocol, String host, int port, String file)
217 throws MalformedURLException
219 this(protocol, host, port, file, null);
223 * Constructs a URL and loads a protocol handler for the values passed in
224 * as arugments. Uses the default port for the protocol.
226 * @param protocol The protocol for this URL ("http", "ftp", etc)
227 * @param host The hostname or IP address for this URL
228 * @param file The "file" portion of this URL.
230 * @exception MalformedURLException If a protocol handler cannot be loaded or
231 * a parse error occurs.
233 public URL(String protocol, String host, String file)
234 throws MalformedURLException
236 this(protocol, host, -1, file, null);
240 * This method initializes a new instance of <code>URL</code> with the
241 * specified protocol, host, port, and file. Additionally, this method
242 * allows the caller to specify a protocol handler to use instead of
243 * the default. If this handler is specified, the caller must have
244 * the "specifyStreamHandler" permission (see <code>NetPermission</code>)
245 * or a <code>SecurityException</code> will be thrown.
247 * @param protocol The protocol for this URL ("http", "ftp", etc)
248 * @param host The hostname or IP address to connect to
249 * @param port The port number to use, or -1 to use the protocol's default
251 * @param file The "file" portion of the URL.
252 * @param ph The protocol handler to use with this URL.
254 * @exception MalformedURLException If no protocol handler can be loaded
255 * for the specified protocol.
256 * @exception SecurityException If the <code>SecurityManager</code> exists
257 * and does not allow the caller to specify its own protocol handler.
261 public URL(String protocol, String host, int port, String file,
262 URLStreamHandler ph) throws MalformedURLException
264 if (protocol == null)
265 throw new MalformedURLException("null protocol");
266 protocol = protocol.toLowerCase();
267 this.protocol = protocol;
271 SecurityManager s = System.getSecurityManager();
273 s.checkPermission(new NetPermission("specifyStreamHandler"));
278 this.ph = getURLStreamHandler(protocol);
281 throw new MalformedURLException("Protocol handler not found: "
286 this.authority = (host != null) ? host : "";
287 if (port >= 0 && host != null)
288 this.authority += ":" + port;
290 int hashAt = file.indexOf('#');
298 this.file = file.substring(0, hashAt);
299 this.ref = file.substring(hashAt + 1);
301 hashCode = hashCode(); // Used for serialization.
305 * Initializes a URL from a complete string specification such as
306 * "http://www.urbanophile.com/arenn/". First the protocol name is parsed
307 * out of the string. Then a handler is located for that protocol and
308 * the parseURL() method of that protocol handler is used to parse the
311 * @param spec The complete String representation of a URL
313 * @exception MalformedURLException If a protocol handler cannot be found
314 * or the URL cannot be parsed
316 public URL(String spec) throws MalformedURLException
318 this((URL) null, spec, (URLStreamHandler) null);
322 * This method parses a String representation of a URL within the
323 * context of an existing URL. Principally this means that any
324 * fields not present the URL are inheritied from the context URL.
325 * This allows relative URL's to be easily constructed. If the
326 * context argument is null, then a complete URL must be specified
327 * in the URL string. If the protocol parsed out of the URL is
328 * different from the context URL's protocol, then then URL String
329 * is also expected to be a complete URL.
331 * @param context The context on which to parse the specification
332 * @param spec The string to parse an URL
334 * @exception MalformedURLException If a protocol handler cannot be found
335 * for the URL cannot be parsed
337 public URL(URL context, String spec) throws MalformedURLException
339 this(context, spec, (URLStreamHandler) null);
343 * Creates an URL from given arguments
344 * This method parses a String representation of a URL within the
345 * context of an existing URL. Principally this means that any fields
346 * not present the URL are inheritied from the context URL. This allows
347 * relative URL's to be easily constructed. If the context argument is
348 * null, then a complete URL must be specified in the URL string.
349 * If the protocol parsed out of the URL is different
350 * from the context URL's protocol, then then URL String is also
351 * expected to be a complete URL.
353 * Additionally, this method allows the caller to specify a protocol handler
354 * to use instead of the default. If this handler is specified, the caller
355 * must have the "specifyStreamHandler" permission
356 * (see <code>NetPermission</code>) or a <code>SecurityException</code>
359 * @param context The context in which to parse the specification
360 * @param spec The string to parse as an URL
361 * @param ph The stream handler for the URL
363 * @exception MalformedURLException If a protocol handler cannot be found
364 * or the URL cannot be parsed
365 * @exception SecurityException If the <code>SecurityManager</code> exists
366 * and does not allow the caller to specify its own protocol handler.
370 public URL(URL context, String spec, URLStreamHandler ph)
371 throws MalformedURLException
373 /* A protocol is defined by the doc as the substring before a ':'
374 * as long as the ':' occurs before any '/'.
376 * If context is null, then spec must be an absolute URL.
378 * The relative URL need not specify all the components of a URL.
379 * If the protocol, host name, or port number is missing, the value
380 * is inherited from the context. A bare file component is appended
381 * to the context's file. The optional anchor is not inherited.
384 // If this is an absolute URL, then ignore context completely.
385 // An absolute URL must have chars prior to "://" but cannot have a colon
386 // right after the "://". The second colon is for an optional port value
387 // and implies that the host from the context is used if available.
389 if ((colon = spec.indexOf("://", 1)) > 0
390 && ! spec.regionMatches(colon, "://:", 0, 4))
394 if ((colon = spec.indexOf(':')) > 0
395 && (colon < (slash = spec.indexOf('/')) || slash < 0))
397 // Protocol specified in spec string.
398 protocol = spec.substring(0, colon).toLowerCase();
399 if (context != null && context.protocol.equals(protocol))
401 // The 1.2 doc specifically says these are copied to the new URL.
405 userInfo = context.userInfo;
406 if (file == null || file.length() == 0)
408 authority = context.authority;
411 else if (context != null)
413 // Protocol NOT specified in spec string.
414 // Use context fields (except ref) as a foundation for relative URLs.
416 protocol = context.protocol;
420 userInfo = context.userInfo;
421 if (file == null || file.length() == 0)
423 authority = context.authority;
425 else // Protocol NOT specified in spec. and no context available.
428 throw new MalformedURLException("Absolute URL required with null context");
430 protocol = protocol.trim();
434 SecurityManager s = System.getSecurityManager();
436 s.checkPermission(new NetPermission("specifyStreamHandler"));
441 this.ph = getURLStreamHandler(protocol);
444 throw new MalformedURLException("Protocol handler not found: "
447 // JDK 1.2 doc for parseURL specifically states that any '#' ref
448 // is to be excluded by passing the 'limit' as the indexOf the '#'
449 // if one exists, otherwise pass the end of the string.
450 int hashAt = spec.indexOf('#', colon + 1);
454 this.ph.parseURL(this, spec, colon + 1,
455 hashAt < 0 ? spec.length() : hashAt);
457 catch (URLParseError e)
459 throw new MalformedURLException(e.getMessage());
463 ref = spec.substring(hashAt + 1);
465 hashCode = hashCode(); // Used for serialization.
469 * Test another URL for equality with this one. This will be true only if
470 * the argument is non-null and all of the fields in the URL's match
471 * exactly (ie, protocol, host, port, file, and ref). Overrides
472 * Object.equals(), implemented by calling the equals method of the handler.
474 * @param obj The URL to compare with
476 * @return true if the URL is equal, false otherwise
478 public boolean equals(Object obj)
480 if (! (obj instanceof URL))
483 return ph.equals(this, (URL) obj);
487 * Returns the contents of this URL as an object by first opening a
488 * connection, then calling the getContent() method against the connection
490 * @return A content object for this URL
491 * @exception IOException If opening the connection or getting the
496 public Object getContent() throws IOException
498 return openConnection().getContent();
502 * Gets the contents of this URL
504 * @param classes The allow classes for the content object.
506 * @return a context object for this URL.
508 * @exception IOException If an error occurs
510 public Object getContent(Class[] classes) throws IOException
512 // FIXME: implement this
517 * Returns the file portion of the URL.
518 * Defined as <code>path[?query]</code>.
519 * Returns the empty string if there is no file portion.
521 * @return The filename specified in this URL, or an empty string if empty.
523 public String getFile()
525 return file == null ? "" : file;
529 * Returns the path of the URL. This is the part of the file before any '?'
532 * @return The path specified in this URL, or null if empty.
536 public String getPath()
538 // The spec says we need to return an empty string, but some
539 // applications depends on receiving null when the path is empty.
542 int quest = file.indexOf('?');
543 return quest < 0 ? getFile() : file.substring(0, quest);
547 * Returns the authority of the URL
549 * @return The authority specified in this URL.
553 public String getAuthority()
559 * Returns the host of the URL
561 * @return The host specified in this URL.
563 public String getHost()
565 int at = (host == null) ? -1 : host.indexOf('@');
566 return at < 0 ? host : host.substring(at + 1, host.length());
570 * Returns the port number of this URL or -1 if the default port number is
573 * @return The port number
575 * @see #getDefaultPort()
583 * Returns the default port of the URL. If the StreamHandler for the URL
584 * protocol does not define a default port it returns -1.
586 * @return The default port of the current protocol.
588 public int getDefaultPort()
590 return ph.getDefaultPort();
594 * Returns the protocol of the URL
596 * @return The specified protocol.
598 public String getProtocol()
604 * Returns the ref (sometimes called the "# reference" or "anchor") portion
609 public String getRef()
615 * Returns the user information of the URL. This is the part of the host
616 * name before the '@'.
618 * @return the user at a particular host or null when no user defined.
620 public String getUserInfo()
622 if (userInfo != null)
624 int at = (host == null) ? -1 : host.indexOf('@');
625 return at < 0 ? null : host.substring(0, at);
629 * Returns the query of the URL. This is the part of the file before the
632 * @return the query part of the file, or null when there is no query part.
634 public String getQuery()
636 int quest = (file == null) ? -1 : file.indexOf('?');
637 return quest < 0 ? null : file.substring(quest + 1, file.length());
641 * Returns a hashcode computed by the URLStreamHandler of this URL
643 * @return The hashcode for this URL.
645 public int hashCode()
648 return hashCode; // Use cached value if available.
650 return ph.hashCode(this);
654 * Returns a URLConnection object that represents a connection to the remote
655 * object referred to by the URL. The URLConnection is created by calling the
656 * openConnection() method of the protocol handler
658 * @return A URLConnection for this URL
660 * @exception IOException If an error occurs
662 public URLConnection openConnection() throws IOException
664 return ph.openConnection(this);
668 * Opens a connection to this URL and returns an InputStream for reading
669 * from that connection
671 * @return An <code>InputStream</code> for this URL.
673 * @exception IOException If an error occurs
675 public InputStream openStream() throws IOException
677 return openConnection().getInputStream();
681 * Tests whether or not another URL refers to the same "file" as this one.
682 * This will be true if and only if the passed object is not null, is a
683 * URL, and matches all fields but the ref (ie, protocol, host, port,
686 * @param url The URL object to test with
688 * @return true if URL matches this URL's file, false otherwise
690 public boolean sameFile(URL url)
692 return ph.sameFile(this, url);
696 * Sets the specified fields of the URL. This is not a public method so
697 * that only URLStreamHandlers can modify URL fields. This might be called
698 * by the <code>parseURL()</code> method in that class. URLs are otherwise
699 * constant. If the given protocol does not exist, it will keep the previously
702 * @param protocol The protocol name for this URL
703 * @param host The hostname or IP address for this URL
704 * @param port The port number of this URL
705 * @param file The "file" portion of this URL.
706 * @param ref The anchor portion of this URL.
708 protected void set(String protocol, String host, int port, String file,
711 URLStreamHandler protocolHandler = null;
712 protocol = protocol.toLowerCase();
713 if (! this.protocol.equals(protocol))
714 protocolHandler = getURLStreamHandler(protocol);
716 // It is an hidden feature of the JDK. If the protocol does not exist,
717 // we keep the previously initialized protocol.
718 if (protocolHandler != null)
720 this.ph = protocolHandler;
721 this.protocol = protocol;
730 this.authority += host;
732 this.authority += ":" + port;
734 hashCode = hashCode(); // Used for serialization.
738 * Sets the specified fields of the URL. This is not a public method so
739 * that only URLStreamHandlers can modify URL fields. URLs are otherwise
740 * constant. If the given protocol does not exist, it will keep the previously
743 * @param protocol The protocol name for this URL.
744 * @param host The hostname or IP address for this URL.
745 * @param port The port number of this URL.
746 * @param authority The authority of this URL.
747 * @param userInfo The user and password (if needed) of this URL.
748 * @param path The "path" portion of this URL.
749 * @param query The query of this URL.
750 * @param ref The anchor portion of this URL.
754 protected void set(String protocol, String host, int port, String authority,
755 String userInfo, String path, String query, String ref)
757 URLStreamHandler protocolHandler = null;
758 protocol = protocol.toLowerCase();
759 if (! this.protocol.equals(protocol))
760 protocolHandler = getURLStreamHandler(protocol);
762 // It is an hidden feature of the JDK. If the protocol does not exist,
763 // we keep the previously initialized protocol.
764 if (protocolHandler != null)
766 this.ph = protocolHandler;
767 this.protocol = protocol;
770 this.userInfo = userInfo;
772 this.authority = authority;
776 this.file = path + "?" + query;
778 hashCode = hashCode(); // Used for serialization.
782 * Sets the URLStreamHandlerFactory for this class. This factory is
783 * responsible for returning the appropriate protocol handler for
786 * @param fac The URLStreamHandlerFactory class to use
788 * @exception Error If the factory is alread set.
789 * @exception SecurityException If a security manager exists and its
790 * checkSetFactory method doesn't allow the operation
792 public static synchronized void setURLStreamHandlerFactory(URLStreamHandlerFactory fac)
795 throw new Error("URLStreamHandlerFactory already set");
797 // Throw an exception if an extant security mgr precludes
798 // setting the factory.
799 SecurityManager s = System.getSecurityManager();
806 * Returns a String representing this URL. The String returned is
807 * created by calling the protocol handler's toExternalForm() method.
809 * @return A string for this URL
811 public String toExternalForm()
813 // Identical to toString().
814 return ph.toExternalForm(this);
818 * Returns a String representing this URL. Identical to toExternalForm().
819 * The value returned is created by the protocol handler's
820 * toExternalForm method. Overrides Object.toString()
822 * @return A string for this URL
824 public String toString()
826 // Identical to toExternalForm().
827 return ph.toExternalForm(this);
831 * This internal method is used in two different constructors to load
832 * a protocol handler for this URL.
834 * @param protocol The protocol to load a handler for
836 * @return A URLStreamHandler for this protocol, or null when not found.
838 private static synchronized URLStreamHandler getURLStreamHandler(String protocol)
840 URLStreamHandler ph = null;
842 // First, see if a protocol handler is in our cache.
845 if ((ph = (URLStreamHandler) ph_cache.get(protocol)) != null)
849 // If a non-default factory has been set, use it to find the protocol.
852 ph = factory.createURLStreamHandler(protocol);
854 else if (protocol.equals("core"))
856 ph = new gnu.java.net.protocol.core.Handler();
858 else if (protocol.equals("file"))
860 // This is an interesting case. It's tempting to think that we
861 // could call Class.forName ("gnu.java.net.protocol.file.Handler") to
862 // get the appropriate class. Unfortunately, if we do that the
863 // program will never terminate, because getURLStreamHandler is
864 // eventually called by Class.forName.
866 // Treating "file" as a special case is the minimum that will
867 // fix this problem. If other protocols are required in a
868 // statically linked application they will need to be handled in
869 // the same way as "file".
870 ph = new gnu.java.net.protocol.file.Handler();
873 // Non-default factory may have returned null or a factory wasn't set.
874 // Use the default search algorithm to find a handler for this protocol.
877 // Get the list of packages to check and append our default handler
878 // to it, along with the JDK specified default as a last resort.
879 // Except in very unusual environments the JDK specified one shouldn't
880 // ever be needed (or available).
881 String ph_search_path =
882 System.getProperty("java.protocol.handler.pkgs");
884 // Tack our default package on at the ends.
885 if (ph_search_path != null)
886 ph_search_path += "|" + DEFAULT_SEARCH_PATH;
888 ph_search_path = DEFAULT_SEARCH_PATH;
890 // Finally loop through our search path looking for a match.
891 StringTokenizer pkgPrefix = new StringTokenizer(ph_search_path, "|");
896 (pkgPrefix.nextToken() + "." + protocol + ".Handler");
900 Object obj = Class.forName(clsName).newInstance();
902 if (! (obj instanceof URLStreamHandler))
905 ph = (URLStreamHandler) obj;
909 // Can't instantiate; handler still null,
910 // go on to next element.
913 while ((! (ph instanceof URLStreamHandler))
914 && pkgPrefix.hasMoreTokens());
917 // Update the hashtable with the new protocol handler.
918 if (ph != null && cache_handlers)
919 if (ph instanceof URLStreamHandler)
920 ph_cache.put(protocol, ph);
927 private void readObject(ObjectInputStream ois)
928 throws IOException, ClassNotFoundException
930 ois.defaultReadObject();
931 this.ph = getURLStreamHandler(protocol);
933 throw new IOException("Handler for protocol " + protocol + " not found");
936 private void writeObject(ObjectOutputStream oos) throws IOException
938 oos.defaultWriteObject();