// This file defines a class that offers a simple interface to Web pages for // Java programmers. // History: // // December 2002 -- Made more accepting of what content types it will // read, in particular should now accept any sort of text. // // October 2002 -- This version created by Doug Baldwin, from an older // version that had a tendency to not actually read whole pages, as // well as some features specific to course exercises done with the // class. This version hopefully fixes the bug, and makes the class // more general. Also adds javadoc documentation. // // Oct. 2001 -- Original version created by Doug Baldwin. package geneseo.cs.sc; import java.net.*; import java.io.*; /** * Represents a Web page, i.e., a long string of text that (presumably) * lives on a Web server somewhere and is identified by a URL. This class * provides a much simpler, but less functional, interface to Web pages * than the standard Java library classes do. */ public class WebPage { // Web page objects record the Java URL object for their page, and the contents // of that page. private URL myURL; private String content; /** * Initialize a Web page. For example *

     *   WebPage page = new WebPage( "http://www.somewhere.com" );
     * 
* @param url The URL for the page this object should represent. */ // This works by creating a Java URL object for this page, and then reading // its content. If the content is text, then it is saved in a member variable // for later. I also save the URL object for this page, if it could be created // at all. I catch exceptions that might be thrown by creating or reading the // page in this method, printing an error message, so that clients know something // has gone wrong, but needn't (and can't) deal with exceptions themselves. public WebPage( String url ) { try { myURL = new URL( url ); content = null; // Will stay null if read fails String protocol = myURL.getProtocol(); // Only process URLs whose protocol implies that they might correspond to // text Web pages: if ( protocol.equalsIgnoreCase("http") || protocol.equalsIgnoreCase("file") ) { URLConnection connection = myURL.openConnection(); String contentType = connection.getContentType(); if ( contentType != null && contentType.startsWith("text/") ) { // Get a reader to this page and read its contents. // Read contents line by line, appending each line to // a string buffer. At the end, save the buffer as a string // in the "content" member variable. StringBuffer contentBuffer = new StringBuffer(); BufferedReader pageReader = new BufferedReader( new InputStreamReader( connection.getInputStream() )); int c = pageReader.read(); // A character from the page while ( c != -1 ) { contentBuffer.append( (char)c ); c = pageReader.read(); } content = contentBuffer.toString(); pageReader.close(); } } } catch ( MalformedURLException error ) { System.err.println( "'" + url + "' isn't a valid Web page: " + error ); myURL = null; content = null; } catch ( IOException error ) { System.err.println( "Couldn't read Web page '" + url + "': " + error ); content = null; } } /** * Return the text stored in a Web page, as a long string. For example *

     *   String pageText = somePage.getText();
     * 
* @return The contents of the page, or null if the * page couldn't be fetched from its server for any reason. */ // This simply returns the "content" member variable, which is either the // text of the page or null if anything went wrong in initialization. public String getText() { return content; } }