Programming Forums

Programming Forums (http://www.programmingforums.org/forumindex.php)
-   Python (http://www.programmingforums.org/forum43.html)
-   -   PySite - Web Development Framework (http://www.programmingforums.org/showthread.php?t=9028)

Sane Mar 24th, 2006 8:43 PM

PySite - Web Development Framework
 
It's not that I'm unhappy with CherryPy, it's just that when I decided to check out their source I was disappointed by the huge amount of code.

I didn't think it was necessary half the things they had there, so I decided I would try to make an optimum Web Development Framework for Python.

It was surprisingly easy. I made a simple script that parses packets from port 80 and sends back the requested function. This is about two hours work.

PySite makes it very easy to quickly create a dynamic website in Python. Just import PySite and apply it to a class. The return values of those functions will appear when you type 'localhost' in to your internet browser URL bar along with the appropriate function extention.

If anyone would like to fill in my comments, fix bad code, or tackle some of the missing items. Go ahead. If you think this is good enough, you guys could help me publish it out there on the net.

It actually works very well, it just needs all the missing stuff to reach v1.0. Best of all, it's only one file!

It's even got some stuff I prefer over CherryPy. ;)

See an example demo website below, along with PySite.py

PySite.py
:

import socket
import time

# limits
#  - does not support multiple directories
#  - does not support static content
#  - does not support multiple threads
#  - does not support backing up/server logs
#  - missing auto-reload
#  - missing "assume crashed"-reload
#  - missing cookies

class PySite:

    def __init__(self):

        # pre-assemble objects
        self._root    = []
        self._index  = 0
        self._505    = 0
        self._404    = 0

        # default dependants
        self._header  = dict()
       
    def config(self, cls, port=80, backup=3600, log=3600*24, output=True):

        # configuration
        self._port    = port    # port to run server on
        self._backup  = backup  # seconds between each backup
        self._log    = log      # seconds between each new log - 0 for no logs
        self._output  = output  # show server logs on the console

        # get main class
        self._cls    = cls()

        # pass through thread specs
        # case sensitive page option

    def do_logs(self, page, params, head, to=False):
        t = time.ctime()
        ip = head['IP']
        if not to:
            try:
                ref = head['Referer']
            except KeyError: ref = ''

            print t, 'RECV:', ip, page, params, ref
        else:
            print t, 'SENT:', ip, page

    def start(self):

        self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self._sock.bind(('', self._port))
        self._sock.listen(1)

        # log initiation

        # thread priority
        # thread bail time
        # thread pool

        # thread before the sock accept and close

        while 1:
           
            conn, addr = self._sock.accept()
            # show request in logs
           
            data = conn.recv(1024)
            page, params, head  = self.strp_data( data )
            head['IP'] = addr[0] # add IP to accessable header content
            self._header = head
            self.do_logs(page, params, head)

            nparams = []
            for key in params.keys():
                nparams += ['%s=%s'%(key, params[key])]
            nparams = ','.join(nparams) # can't just do **params because of exec

            try:
                if page in self._root:
                    exec "res = self._cls.%s(%s)"%(page, nparams)
                elif not page:
                    exec "res = self._cls.%s(%s)"%(self._index, nparams)
                else:
                    exec "res = self._cls.%s(%s)"%(self._404, nparams)

            except:
                exec "res = self._cls.%s()"%(self._505)  # pass options to 505 function

            conn.send(res)
            conn.close()

            self.do_logs(page, params, head, to=True)

    def request(self):
        # figure out how to work properly for multiple threads
        return self._header

    def strp_data(self, data):

        data = data.replace('\r', '').split('\n')

        # get requested http page
        head = data[0].split(' ')[1][1:]

        params = dict()
        if '?' in head: # get data
            p = head.split('?')
            page = p[0]
            p = p[1]
            for le in p.split('&'):
                lep = le.split('=')
                params[lep[0]] = lep[1]
        else:
            page = head
        # make catch non-formatted url !

        res = dict()
        # ['Accept-Language', 'Accept-Encoding', 'Accept', 'User-Agent', 'Host', 'Referer', 'Cache-Control', 'Content-Length', 'Connection']
        for r in range(1, len(data)):
            datar = data[r]

            # post data
            if '=' in datar: # can head info have '='?
                rr = datar.split('=', 1)
                params[rr[0]] = rr[1]

            # header info
            elif ': ' in datar: 
                rr = datar.split(': ', 1)
                res[rr[0]] = rr[1]               

        return page, params, res

    # expose web page to the internet
    def Expose(self, func):
        self._root += [func.func_name]
        return func
   
    # page accessed if url left blank
    def Index(self, func):
        self._index = func.func_name
        self.Expose(func)
        return func
   
    # page accessed if a 505 (internal error) occurs
    def Error(self, func):
        self._505 = func.func_name
        self.Expose(func)
        return func

    # page accessed if a 404 (missing page) occurs
    def Default(self, func):
        self._404 = func.func_name
        self.Expose(func)
        return func




my_website.py

:

from PySite import PySite
from time import ctime

PySite = PySite()

class my_page:

    @PySite.Index
    def index(self):
        return """
<html>

<body>
<a href='/'>Home</a><br />
<a href='/page1'>What is the time?</a><br />
<a href='/page2'>Hidden page</a><br />
<a href='/stats'>What is my ip?</a><br />
<a href='/make_error'>Error</a><br />
<a href='/na'>Missing Page</a><br />

<form action='/show_form'
      method='get'>
  GET: <input type='text' name='text' />
  <input type='submit' value='Submit' />
</form>

<form action='/show_form'
      method='post'>
  POST: <input type='text' name='text' />
  <input type='submit' value='Submit' />
</form>

</body>

</html>
"""

    @PySite.Error
    def error(self):
        return "OH NO! 505!"

    @PySite.Default
    def default(self, *args, **kwargs):
        return "OH NO! 404! %s"%str([args, kwargs])

    @PySite.Expose
    def page1(self):
        return ctime()

    def page2(self):
        return "You can't see me!"

    @PySite.Expose
    def make_error(self):
        return nonexistant_variable

    @PySite.Expose
    def stats(self):
        r = PySite.request()
        k = ''
        for key in r.keys():
            k += ' [%s : %s] '%(key, r[key])
        # dir(r) for a full list of available info
        return k

    @PySite.Expose
    def show_form(self, text=''):
        return 'Your post: %s'%text

PySite.config(my_page)
PySite.start()


Arevos Mar 25th, 2006 5:26 AM

Part of the advantage, and perhaps a lot of the problem, with Python web development, is that there are a lot of web frameworks to choose from. You might be interested in googling on WSGI, which is an attempt to provide a standard to build these frameworks on top of.

Returning to your program, it's impressive, but I think you're reinventing the wheel a little. Why use raw sockets, when the standard Python library includes a BaseHTTPServer module for you to use? The advantages to using BaseHTTPServer are twofold: firstly, it's easier, and secondly you know that it adheres to HTTP specifications, which can be something of a minefield to do manually.

:

from BaseHTTPServer import *
pages = {}
def expose(function):
        pages["/" + function.__name__] = function
        if function.__name__ == 'index':
                pages["/"] = function
        return function

class HTTPHandler(BaseHTTPRequestHandler):
        def do_GET(self):
                page_contents = pages[self.path]()
                self.send_response(200)
                self.send_header("Content-type", "text/html")
                self.wfile.write(page_contents)
@expose
def index():
        return """<html>
                <head><title>Test</title><head>
                <body>Hello World</body>
                </html>"""

if __name__ == '__main__':
        httpd = HTTPServer(('', 8000), HTTPHandler)
        httpd.serve_forever()

I believe CherryPy uses BaseHTTPServer, too.

Sane Mar 25th, 2006 10:21 AM

Well I knew about the BaseHTTPServer but I figured "hand-rolled" functions are quite faster. Maybe not to as much of a degree here, but couldn't it make a difference?

And I still need to figure out the proper formatting for returning the html, because on FireFox you just see plain source. :(

Arevos Mar 25th, 2006 11:07 AM

Quote:

Originally Posted by Sane
Well I knew about the BaseHTTPServer but I figured "hand-rolled" functions are quite faster. Maybe not to as much of a degree here, but couldn't it make a difference?

I'm not sure it will, not to any significant degree. Both your system and BaseHTTPServer are programmed in Python, so there's unlikely to be much efficiency advantage between them.

One advantage in using BaseHTTPServer is that it's had a lot of use. People all over the world have coded with it, and thus there are likely to be fewer bugs in the implementation. Also, since BaseHTTPServer is based upon SocketServer.TCPServer, it also supports threading and forking mixins.

Another, more obvious advantage, is that it does a lot of work for you.

I can understand wanting to use your own systems. It wasn't so long ago that I shunned libraries and wanted to write my own implementations of all sorts of systems. It was only at University that discussions with my peers lead me to realise that a lot of the time, reinventing the wheel isn't necessary. Indeed, the time spent reimplementing a system could be spent inventing something entirely new, which may be much more interesting and satisfying.

However, I think this is one of those things that a programmer has to come to realise on their own, and until then, my words won't hold much weight.

Quote:

Originally Posted by Sane
And I still need to figure out the proper formatting for returning the html, because on FireFox you just see plain source. :(

Try adding the header: "Content-type: text/html"

Sane Mar 25th, 2006 1:25 PM

Well even if it is much more beneficial to use these libraries, I still feel like I'm learning a lot more doing it from scratch. I'm still quite new to programming. Hell, I didn't even understand sockets until yesterday. So I feel like this is the only way I will end up understanding these things clearly, and in the end that may be much more helpful.

Sane Mar 25th, 2006 6:52 PM

I tried adding the content-type: text/html to the top of the socket return literal, but I guess that's not the proper way to format header info.

:

import socket
import time

# limits
#  - does not support multiple directories
#  - does not support static content
#  - does not support multiple threads
#  - does not support backing up/server logs
#  - missing auto-reload
#  - missing "assume crashed"-reload
#  - missing cookies

class PySite:

    def __init__(self):

        # pre-assemble objects
        self._root    = []
        self._index  = 0
        self._505    = 0
        self._404    = 0

        # default dependants
        self._header  = dict()
       
    def config(self, cls, port=80, backup=3600, log=3600*24, output=True):

        # configuration
        self._port    = port    # port to run server on
        self._backup  = backup  # seconds between each backup
        self._log    = log      # seconds between each new log - 0 for no logs
        self._output  = output  # show server logs on the console

        # get main class
        self._cls    = cls()

        # pass through thread specs
        # case sensitive page option
        # different levels of logging

        self.do_logs('Server Loaded and Configured\n')

    def do_logs(self, text):
        print text

    def start(self):

        self.do_logs('Server Started...')

        self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.do_logs(' - Initiated Socket Object')

        self._sock.bind(('', self._port))
        self.do_logs(' - Bound Port to Socket')

        self._sock.listen(1)
        self.do_logs('Listening on Port [%s]\n'%self._port)

        # thread priority
        # thread bail time
        # thread pool

        # thread before the sock accept and close

        while 1:
           
            conn, addr = self._sock.accept()
            # show request in logs
           
            data = conn.recv(1024)
            page, params, head  = self.strp_data( data )

            head['IP'] = addr[0] # add IP to accessable header content
            self._header = head  # remember header content

            self.do_logs('[RECV <- %s] [%s] %s %s'%(addr[0], time.ctime(), page, params))

            # make request
            res = self.run_func(page, params) # try and except for backup error

            # send result
            conn.send(res)
            conn.close()

            self.do_logs('[SEND -> %s] [%s] %s %s'%(addr[0], time.ctime(), page, params))

    def run_func(self, page, params):
        nparams = []
        for key in params.keys():
            nparams += ['%s=%s'%(key, params[key])]
        nparams = ','.join(nparams) # can't just do **params because of exec

        try:
            if page in self._root:
                exec "res = self._cls.%s(%s)"%(page, nparams)
            elif not page:
                if not self._index:
                    res = '' # execute alternative self._index
                else:
                    exec "res = self._cls.%s(%s)"%(self._index, nparams)
            else:
                if not self._404:
                    res = '' # execute alternative self._404
                else:
                    exec "res = self._cls.%s(%s)"%(self._404, nparams)

        except:
            if not self._505:
                res = '' # execute alternative self._505
            else:
                exec "res = self._cls.%s()"%(self._505)  # pass options to 505 function

        return res # "Content-type: text/html\n\n" + res

    def request(self):
        # figure out how to work properly for multiple threads
        return self._header

    def strp_data(self, data):

        data = data.replace('\r', '').split('\n')

        # get requested http page
        head = data[0].split(' ')[1][1:]

        params = dict()
        if '?' in head: # get data
            p = head.split('?', 1)
            page = p[0]
            p = p[1]
            for le in p.split('&'):
                if '=' in le:
                    lep = le.split('=', 1)
                    params[lep[0]] = lep[1]
        else:
            page = head

        res = dict()
        ptime = False
        # ['Accept-Language', 'Accept-Encoding', 'Accept', 'User-Agent', 'Host', 'Referer', 'Cache-Control', 'Content-Length', 'Connection']
        for r in range(1, len(data)):
            datar = data[r]

            # post data
            if '=' in datar and ptime:
                rr = datar.split('=', 1)
                params[rr[0]] = rr[1]

            # header info
            elif ': ' in datar: 
                rr = datar.split(': ', 1)
                res[rr[0]] = rr[1]

            elif not datar:
                ptime = True

        return page, params, res

    # expose web page to the internet
    def Expose(self, func):
        self._root += [func.func_name]
        return func
   
    # page accessed if url left blank
    def Index(self, func):
        self._index = func.func_name
        self.Expose(func)
        return func
   
    # page accessed if a 505 (internal error) occurs
    def Error(self, func):
        self._505 = func.func_name
        self.Expose(func)
        return func

    # page accessed if a 404 (missing page) occurs
    def Default(self, func):
        self._404 = func.func_name
        self.Expose(func)
        return func


It's the commented part with content-type: text/html.

Arevos Mar 25th, 2006 7:12 PM

IIRC, HTTP uses "\r\n"s instead of "\n"s.

Sane Mar 26th, 2006 9:52 AM

Didn't help. Still shows up in the source. I think the header has to be sent as a completely different section or something. :S

Arevos Mar 26th, 2006 12:00 PM

The easiest way to check out what it is that your program's doing wrong is to send a HTTP GET request to your webserver, record the output, then do the same for a 'proper' webserver.

Sane Mar 26th, 2006 3:26 PM

Okay, I sent an http request to my webserver with a raw socket, and got back

:

HTTP/1.1 200 OK

Date: <insert time> GMT

Server: <server name>/<server version>

Content-Length: <len(page)>

Content-Type: text/html

Connection: close


So then I sent that back to my webserver in a socket again, and the second time I got back the actual page (instead of the header).

So I assume then that the header info should be returned before you get the actual page.

I tried this method on PySite, first sending the client an identical header, then waiting for that header info to be sent back, then sending the site in return.

So basically my server is like this:
> Open socket
>> Wait for HTTP Request
>>> Send header info
>> Wait to recieve header info back
>> Send webpage
> Close socket

But the problem is the header info is never sent back... Here's my source:

:

        while 1:
           
            conn, addr = self._sock.accept()
            # show request in logs
           
            data = conn.recv(1024)
            page, params, head  = self.strp_data( data )

            head['IP'] = addr[0] # add IP to accessable header content
            self._header = head  # remember header content

            self.do_logs('[RECV <- %s] [%s] %s %s'%(addr[0], time.ctime(), page, params))

            # make request
            head, site = self.run_func(page, params) # try and except for backup error

            conn.send(head)
           
            data = conn.recv(1024)

            conn.send(site)
            conn.close()

            self.do_logs('[SEND -> %s] [%s] %s %s'%(addr[0], time.ctime(), page, params))

    def run_func(self, page, params):
        nparams = []
        for key in params.keys():
            nparams += ['%s=%s'%(key, params[key])]
        nparams = ','.join(nparams) # can't just do **params because of exec

        try:
            if page in self._root:
                exec "res = self._cls.%s(%s)"%(page, nparams)
            elif not page:
                if not self._index:
                    res = '' # execute alternative self._index
                else:
                    exec "res = self._cls.%s(%s)"%(self._index, nparams)
            else:
                if not self._404:
                    res = '' # execute alternative self._404
                else:
                    exec "res = self._cls.%s(%s)"%(self._404, nparams)

        except:
            if not self._505:
                res = '' # execute alternative self._505
            else:
                exec "res = self._cls.%s()"%(self._505)  # pass options to 505 function

        return """HTTP/1.1 200 OK

Date: %s GMT

Server: PySite/1.0.0

Content-Length: %s

Content-Type: text/html

Connection: close




"""%(time.ctime(), len(res)), res


I've also tried starting a new socket object between the header request and the page request, but that didn't help.

By the way, that double spacing is how these forums handle \r\n.


All times are GMT -5. The time now is 7:35 PM.

Powered by vBulletin® Version 3.7.0, Copyright ©2000 - 2008, Jelsoft Enterprises Ltd.
Copyright ©2007 DaniWeb® LLC