ref: 2dabff3c93c430a7e5578327227e8b6789c31ccd
dir: /src/utils/cst_url.c/
/*************************************************************************/
/*                                                                       */
/*                  Language Technologies Institute                      */
/*                     Carnegie Mellon University                        */
/*                         Copyright (c) 2011                            */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission is hereby granted, free of charge, to use and distribute  */
/*  this software and its documentation without restriction, including   */
/*  without limitation the rights to use, copy, modify, merge, publish,  */
/*  distribute, sublicense, and/or sell copies of this work, and to      */
/*  permit persons to whom this work is furnished to do so, subject to   */
/*  the following conditions:                                            */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*   4. The authors' names are not used to endorse or promote products   */
/*      derived from this software without specific prior written        */
/*      permission.                                                      */
/*                                                                       */
/*  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*             Author:  Alan W Black (awb@cs.cmu.edu)                    */
/*               Date:  November 2011                                    */
/*************************************************************************/
/*                                                                       */
/*  Support to access (some forms of) url                                */
/*      http:  and file:                                                 */
/*  Should support libcurl if available -- but a simple form if not      */
/*                                                                       */
/*  Only support http: if sockets are available                          */
/*                                                                       */
/*************************************************************************/
#include "cst_math.h"
#include "cst_file.h"
#include "cst_string.h"
#include "cst_tokenstream.h"
#include "cst_socket.h"
#ifndef CST_NO_SOCKETS
#ifndef _MSC_VER
#include <stdlib.h>
#include <unistd.h>
#else
#include <io.h>
#include <WinSock.h>
#endif
#endif
int cst_urlp(const char *url)
{
    /* Return 1 if url is a url, 0 otherwise */
    /* This is decided by the initial substring being "http:" or "file:" */
    if ((cst_strlen(url) > 4) &&
        (cst_streqn("http:",url,5) ||
         cst_streqn("file:",url,5)))
        return TRUE;
    else
        return FALSE;
}
cst_file cst_url_open(const char *url)
{
    /* Always opens it for reading */
    cst_tokenstream *urlts;
    const cst_string *protocol;
    int port;
    cst_string *host;
    int fd;
    char *url_request;
    char *path;
    cst_file ofd;
    int state,n;
    char c;
    urlts = ts_open_string(url, "", ":/", "", "");
    protocol = ts_get(urlts);
    if (cst_streq(protocol,"http"))
    {
#ifdef CST_NO_SOCKETS
        ts_close(urlts);
        return NULL;
#else
        if (!cst_streq(ts_get(urlts),":") ||
            !cst_streq(ts_get(urlts),"/") ||
            !cst_streq(ts_get(urlts),"/"))
        {
            ts_close(urlts);
            return NULL;
        }
        host = cst_strdup(ts_get(urlts));
        if (cst_streq(ts_get(urlts),":"))
            port = (int)cst_atof(ts_get(urlts));
        else
            port = 80;
        /* Open port to web server */
        fd = cst_socket_open(host,port);
        if (fd < 0)
        {
            cst_free(host);
            ts_close(urlts);
            return NULL;
        }
        url_request = cst_alloc(char,cst_strlen(url)+17);
        cst_sprintf(url_request,"GET %s HTTP/1.2\n\n",url);
        n = write(fd,url_request,cst_strlen(url_request));
        cst_free(url_request);
        /* Skip http header -- until \n\n */
        state=0;
        while (state != 4)
        {
            n=read(fd,&c,1);
            if (n == 0)
            {   /* eof or link gone down */
                cst_free(host);
                ts_close(urlts);
                return NULL;
            }
            if ((state == 0) && (c == '\r'))
                state=1;
            else if ((state == 1) && (c == '\n'))
                state=2;
            else if ((state == 2) && (c == '\r'))
                state=3;
            else if ((state == 3) && (c == '\n'))
                state=4;
            /* Not sure you can get no CRs in the stream */
            else if ((state == 0) && (c == '\n'))
                state=2;
            else if ((state == 2) && (c == '\n'))
                state=4;
            else
                state = 0;
        }
        ofd = fdopen(fd,"rb");
        ts_close(urlts);
        cst_free(host);
        return ofd;
#endif
    }
    else if (cst_streq(protocol,"file"))
    {
        if (!cst_streq(ts_get(urlts),":") ||
            !cst_streq(ts_get(urlts),"/") ||
            !cst_streq(ts_get(urlts),"/"))
        {
            ts_close(urlts);
            return NULL;
        }
        path = cst_strdup(&urlts->string_buffer[urlts->file_pos-1]);
        /* printf("awb_debug fileurl %s\n",path); */
        ofd = cst_fopen(path,CST_OPEN_READ);
        ts_close(urlts);
        cst_free(path);
        
        return ofd;
    }
    else
    {   /* Unsupported protocol */
        return NULL;
    }
}