// This file defines a class that offers a simple interface to Web pages for
// Java programmers.

// History:
//
//  December 2002 -- Made more accepting of what content types it will
//    read, in particular should now accept any sort of text.
//
//  October 2002 -- This version created by Doug Baldwin, from an older
//    version that had a tendency to not actually read whole pages, as
//    well as some features specific to course exercises done with the
//    class. This version hopefully fixes the bug, and makes the class
//    more general. Also adds javadoc documentation.
//
//  Oct. 2001 -- Original version created by Doug Baldwin.




package geneseo.cs.sc;




import java.net.*;
import java.io.*;




/**
 * Represents a Web page, i.e., a long string of text that (presumably)
 * lives on a Web server somewhere and is identified by a URL. This class
 * provides a much simpler, but less functional, interface to Web pages
 * than the standard Java library classes do.
 */
 
public class WebPage {




    // Web page objects record the Java URL object for their page, and the contents
    // of that page.
    
    private URL myURL;
    private String content;




    /**
     * Initialize a Web page. For example
     * <pre><code>
     *   WebPage page = new WebPage( "http://www.somewhere.com" );
     * </code></pre>
     * @param url The URL for the page this object should represent.
     */
     
    // This works by creating a Java URL object for this page, and then reading
    // its content. If the content is text, then it is saved in a member variable
    // for later. I also save the URL object for this page, if it could be created
    // at all. I catch exceptions that might be thrown by creating or reading the
    // page in this method, printing an error message, so that clients know something
    // has gone wrong, but needn't (and can't) deal with exceptions themselves.
    
    public WebPage( String url ) {
            
        try {
        
            myURL = new URL( url );
            content = null;                 // Will stay null if read fails
            
            String protocol = myURL.getProtocol();


            // Only process URLs whose protocol implies that they might correspond to
            // text Web pages:
                        
            if (    protocol.equalsIgnoreCase("http")
                 || protocol.equalsIgnoreCase("file")  ) {
                 
                URLConnection connection = myURL.openConnection();              
                String contentType = connection.getContentType();
                
                if (  contentType != null  &&  contentType.startsWith("text/")  ) {

                    // Get a reader to this page and read its contents.
                    // Read contents line by line, appending each line to
                    // a string buffer. At the end, save the buffer as a string
                    // in the "content" member variable.
                    
                    StringBuffer contentBuffer = new StringBuffer();
                        
                    BufferedReader pageReader = new BufferedReader(
                                                        new InputStreamReader(
                                                                connection.getInputStream() ));

                    int c = pageReader.read();      // A character from the page
                    while ( c != -1 ) {
                        contentBuffer.append( (char)c );
                        c = pageReader.read();
                    }
                    
                    content = contentBuffer.toString();
                    
                    pageReader.close();
                }
            }
        }
        catch ( MalformedURLException error ) {
            System.err.println( "'" + url + "' isn't a valid Web page: " + error );
            myURL = null;
            content = null;
        }
        catch ( IOException error ) {
            System.err.println( "Couldn't read Web page '" + url + "': " + error );
            content = null;
        }
    }
    
    
    

    /**
     * Return the text stored in a Web page, as a long string. For example
     * <pre><code>
     *   String pageText = somePage.getText();
     * </code></pre>
     * @return The contents of the page, or <code>null</code> if the
     *   page couldn't be fetched from its server for any reason.
     */
        
    // This simply returns the "content" member variable, which is either the
    // text of the page or null if anything went wrong in initialization.
    
    public String getText() {
        return content;
    }
    
}