<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' xmlns:georss='http://www.georss.org/georss' xmlns:gd='http://schemas.google.com/g/2005' xmlns:thr='http://purl.org/syndication/thread/1.0'><id>tag:blogger.com,1999:blog-33306823</id><updated>2012-01-29T21:27:06.861-08:00</updated><category term='gpirate'/><category term='couchdb'/><category term='lighttpd'/><category term='postgres'/><category term='admin'/><category term='erlang'/><category term='redis'/><category term='torrents'/><category term='scaling'/><category term='dictionary python'/><category term='threading'/><category term='coroutines'/><category term='mod_fcgid'/><category term='webserver'/><category term='python api'/><category term='notifications'/><category term='css'/><category term='sql javascript berkeley'/><category term='ios'/><category term='python'/><category term='inbox'/><category term='webpy'/><category term='postgres path postgis'/><category term='windows'/><category term='email'/><category term='python linuxvirtualserver'/><category term='piratebay'/><category term='vim'/><category term='code'/><category term='cron'/><category term='JSON'/><category term='apache'/><category term='linux'/><category term='python datetime'/><category term='facebook'/><category term='tech'/><category term='amazon s3 ipython bitbucket 7zip'/><category term='mysql'/><category term='cheetah'/><category term='truthbox'/><category term='example'/><category term='amazon s3'/><category term='flash ffmpeg linux video'/><category term='textmate'/><category term='cheetah python gotchas'/><category term='postgis'/><category term='python autocomplete'/><category term='mongodb'/><category term='jinja2'/><category term='gaim dbus'/><category term='anonymous'/><category term='yum fedora'/><category term='bash linux'/><category term='getframe'/><category term='unicode python'/><category term='google search'/><category term='memcached'/><category term='openpoker'/><category term='python regexp'/><category term='locals'/><category term='python sets'/><category term='svn'/><title type='text'>pyLabs!</title><subtitle type='html'></subtitle><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/posts/default'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default?max-results=100'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/'/><link rel='hub' href='http://pubsubhubbub.appspot.com/'/><link rel='next' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default?start-index=101&amp;max-results=100'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator><openSearch:totalResults>168</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>100</openSearch:itemsPerPage><entry><id>tag:blogger.com,1999:blog-33306823.post-3806933329005258355</id><published>2011-03-05T15:58:00.001-08:00</published><updated>2011-03-05T15:58:59.217-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='cheetah'/><category scheme='http://www.blogger.com/atom/ns#' term='textmate'/><title type='text'>Cheetah textmate bundle</title><content type='html'>&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-3806933329005258355?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='related' href='http://groups.google.com/group/ipython-tmbundle/attach/d5b148ec5d1b775a/Cheetah.tmbundle.zip?part=4' title='Cheetah textmate bundle'/><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/3806933329005258355/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=3806933329005258355' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3806933329005258355'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3806933329005258355'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2011/03/cheetah-textmate-bundle.html' title='Cheetah textmate bundle'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-618059263973410478</id><published>2011-03-04T11:25:00.001-08:00</published><updated>2011-03-04T11:30:59.579-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='jinja2'/><category scheme='http://www.blogger.com/atom/ns#' term='ios'/><category scheme='http://www.blogger.com/atom/ns#' term='redis'/><category scheme='http://www.blogger.com/atom/ns#' term='mongodb'/><category scheme='http://www.blogger.com/atom/ns#' term='textmate'/><title type='text'>Stack 2.0 - Mongodb, node.js, redis, ios</title><content type='html'>Node.js seems to be the viable backend for scalable chat and game services&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Mongodb seems to be the best tool for storing information on any backend game or mobile apps&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;When you compare this to what was available in 2005 , postgres/mysql and twisted/ejabberd - this is definitely a quantum improvement and 10gen definitely has made Mongodb the defacto backend replacing cassandra and many other backends&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;I wonder where redis fits into this ecosystem, i might presume it would be good as a caching option?&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Jinja2 is good for templating, usage of jinja2 is enhanced by snippets in textmate&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;iOS is the winner among the smart phone oses&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-618059263973410478?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='related' href='http://fotoroll.com/mongodb' title='Stack 2.0 - Mongodb, node.js, redis, ios'/><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/618059263973410478/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=618059263973410478' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/618059263973410478'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/618059263973410478'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2011/03/stack-20.html' title='Stack 2.0 - Mongodb, node.js, redis, ios'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-1606376029434664300</id><published>2010-03-19T21:41:00.000-07:00</published><updated>2010-03-19T21:48:15.953-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='webpy'/><category scheme='http://www.blogger.com/atom/ns#' term='mod_fcgid'/><category scheme='http://www.blogger.com/atom/ns#' term='apache'/><title type='text'>Apache mod_fcgid is the way to go for webpy</title><content type='html'>1. automatically spawns more processes based on traffic - lighttpd automatically the spawns the max number of processes even when no traffic - which is not useful if your max-procs is 100&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;2. usually need to set PYTHONPATH or append to sys.path inside code.py manually &lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-family:'courier new';"&gt;import os,sys&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-family:'courier new';"&gt;sys.path.append( os.path.dirname(os.path.abspath(__file__)))&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;3. This is how you hook app.run for webpy&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-family:'courier new';"&gt; if __name__ == '__main__':&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-family:'courier new';"&gt;    if apache:&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-family:'courier new';"&gt;        web.wsgi.runwsgi = lambda func, addr=None: web.wsgi.runfcgi(func, addr)&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-family:'courier new';"&gt;        app.run(session_mw)&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-family:'courier new';"&gt;    else:app.run(session_mw)&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;a href="http://webpy.org/cookbook/fastcgi-apache"&gt;The link to the related Webpy cookbook is here&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-1606376029434664300?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/1606376029434664300/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=1606376029434664300' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1606376029434664300'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1606376029434664300'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2010/03/apache-modfcgid-is-way-to-go-for-webpy.html' title='Apache mod_fcgid is the way to go for webpy'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8411544231292968057</id><published>2009-02-25T14:31:00.000-08:00</published><updated>2009-02-25T14:32:21.620-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='memcached'/><category scheme='http://www.blogger.com/atom/ns#' term='getframe'/><category scheme='http://www.blogger.com/atom/ns#' term='cheetah python gotchas'/><category scheme='http://www.blogger.com/atom/ns#' term='locals'/><title type='text'>Python Accessing Caller **locals() from callee method using sys._getframe</title><content type='html'>&lt;div&gt;This is my first attempt at memcaching html page using cheetah &lt;/div&gt;&lt;div&gt;since cheetah render needs locals() i use getCallerInfo() to get the locals() and send to memcached&lt;/div&gt;&lt;div&gt;let me know if it is possible to better do this&lt;/div&gt;  &lt;div&gt;&lt;b&gt;notice getCallerInfo&lt;br /&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;utils.py&lt;/b&gt;&lt;/div&gt;&lt;div&gt;@log_time_func&lt;/div&gt;&lt;div&gt;def renderpage(key, htmlfile, deleteafter=3600):&lt;/div&gt;&lt;div&gt;    from globaldb import mc&lt;/div&gt;&lt;div&gt;    try:page = mc.get(key)&lt;/div&gt;  &lt;div&gt;    except:&lt;/div&gt;&lt;div&gt;        page=None&lt;/div&gt;&lt;div&gt;        &lt;a href="http://clogger.info/" target="_blank"&gt;clogger.info&lt;/a&gt;('except error mc.get '+ key)&lt;/div&gt;&lt;div&gt;    if not page:&lt;/div&gt;&lt;div&gt;        &lt;a href="http://clogger.info/" target="_blank"&gt;clogger.info&lt;/a&gt;(key+ ' rendering cheetah page')&lt;/div&gt;  &lt;div&gt;        terms = getCallerInfo(1)&lt;/div&gt;&lt;div&gt;        #print terms&lt;/div&gt;&lt;div&gt;        page = str(web.render(htmlfile, asTemplate=True, terms=terms))&lt;/div&gt;&lt;div&gt;        try:mc.set(key, page, deleteafter)&lt;/div&gt;&lt;div&gt;        except:&lt;/div&gt;  &lt;div&gt;            &lt;a href="http://clogger.info/" target="_blank"&gt;clogger.info&lt;/a&gt;('except error mc.set '+ key)&lt;/div&gt;&lt;div&gt;    return page&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;@log_time_func&lt;/div&gt;&lt;div&gt;@memcachethis&lt;/div&gt;&lt;div&gt;def mcrenderpage(key, htmlfile, deleteafter=3600):&lt;/div&gt;  &lt;div&gt;    terms = getCallerInfo(2)&lt;/div&gt;&lt;div&gt;    #print terms&lt;/div&gt;&lt;div&gt;    return str(web.render(htmlfile, asTemplate=True, terms=terms))&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;def getCallerInfo(decorators=0):&lt;/div&gt;&lt;div&gt;    '''returns locals of caller using frame.optional pass number of decorators\nFrom Dig deep into python internals &lt;a href="http://www.devx.com/opensource/Article/31593/1954%27" target="_blank"&gt;http://www.devx.com/&lt;wbr&gt;opensource/Article/31593/1954'&lt;/a&gt;&lt;wbr&gt;''&lt;/div&gt;  &lt;div&gt;    f = sys._getframe(2+decorators)&lt;/div&gt;&lt;div&gt;    args = inspect.getargvalues(f)&lt;/div&gt;&lt;div&gt;    return args[3]&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;Usage&lt;/b&gt;&lt;/div&gt;&lt;div&gt;        key=facebookstuff.APP_NAME+'&lt;wbr&gt;newstart'+str(uid)&lt;/div&gt;  &lt;div&gt;        return utils.renderpage(key, 'pick.html')&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8411544231292968057?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8411544231292968057/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8411544231292968057' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8411544231292968057'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8411544231292968057'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2009/02/python-accessing-caller-locals-from.html' title='Python Accessing Caller **locals() from callee method using sys._getframe'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-5503436165576193076</id><published>2009-01-10T15:01:00.000-08:00</published><updated>2009-01-10T15:03:55.489-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='example'/><category scheme='http://www.blogger.com/atom/ns#' term='couchdb'/><category scheme='http://www.blogger.com/atom/ns#' term='facebook'/><category scheme='http://www.blogger.com/atom/ns#' term='notifications'/><category scheme='http://www.blogger.com/atom/ns#' term='code'/><title type='text'>Facebook sytle Notifications in couchdb</title><content type='html'>Goal: To prepare a facebook style notification using couchdb&lt;br /&gt;schema&lt;br /&gt;class Notification(Document):&lt;br /&gt;    unread = BooleanField(default=True)&lt;br /&gt;    activity = TextField()&lt;br /&gt;    appname = TextField()&lt;br /&gt;    image = TextField()&lt;br /&gt;    type = TextField(default='notification')&lt;br /&gt;    fromuser = TextField()&lt;br /&gt;    touser = TextField()&lt;br /&gt;    link = TextField()&lt;br /&gt;    message = TextField()&lt;br /&gt;    created = DateTimeField(default=datetime.datetime.now())&lt;br /&gt;    time =TimeField(default=datetime.datetime.now())&lt;br /&gt;    date = DateField(default=datetime.date.today()&lt;br /&gt;&lt;br /&gt;def savenotifications(fromuser, tousers, link, message, activity, image, appname, *args, **kwargs):&lt;br /&gt;    for touser in tousers:&lt;br /&gt;        if fromuser == touser:continue&lt;br /&gt;        notidoc = Notification( fromuser = fromuser,touser=touser, link=link, message=message, activity=activity, image = image, appname = appname)&lt;br /&gt;        notidoc.store(fbcouchdb)&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;    for touser in tousers:&lt;/span&gt;&lt;br /&gt; &lt;span style="font-weight: bold;"&gt;        r=fbcouchdb.view('_view/pop/unread',key=str(touser), count=1)&lt;/span&gt;&lt;br /&gt; &lt;span style="font-weight: bold;"&gt;        for i in r:i &lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Bold: I even try to prepare the views when some one gets a new notifications by precalling the view unread ahead of time when a new notification for that user is inserted inorder to cause forced view regeneration&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;     def getnotifications(self, all=False):&lt;br /&gt;         try:&lt;br /&gt;             from couchmodel import fbcouchdb&lt;br /&gt;             if not all:&lt;br /&gt;                 return [ (x.id, x.value) for x in fbcouchdb.view('_view/pop/unre&lt;br /&gt; ad',key=str(self.uid), count=5) ]&lt;br /&gt;             else:&lt;br /&gt;                 return [ (x.id, x.value) for x in fbcouchdb.view('_view/pop/noti&lt;br /&gt; fications',key=str(self.uid)) ]&lt;br /&gt;         except:return[]&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;Problem&lt;/span&gt;:but whenever i do  getnotification it is really slow.. And the page doesnt open forever. Anyways to make couchdb usable with this db? the situation is that notifications are constantly inserted into the db all the time as things happen to the user in real time. is this not a good use case for couchdb&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-5503436165576193076?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='related' href='http://teenwag.com' title='Facebook sytle Notifications in couchdb'/><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/5503436165576193076/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=5503436165576193076' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5503436165576193076'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5503436165576193076'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2009/01/facebook-sytle-notifications-in-couchdb.html' title='Facebook sytle Notifications in couchdb'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-1727560539205092371</id><published>2009-01-07T00:23:00.001-08:00</published><updated>2009-01-07T00:34:46.907-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='anonymous'/><category scheme='http://www.blogger.com/atom/ns#' term='inbox'/><category scheme='http://www.blogger.com/atom/ns#' term='email'/><category scheme='http://www.blogger.com/atom/ns#' term='couchdb'/><category scheme='http://www.blogger.com/atom/ns#' term='truthbox'/><title type='text'>Using couchdb to build a EMAIL messaging solution with threaded inline replies</title><content type='html'>&lt;span style="font-weight: bold;"&gt;Couchdb based Messaging system&lt;/span&gt;&lt;br /&gt;The goal is to build an anonymous inbox with inline threaded replies and count of number of items in the inbox and sent. It works well, the only part that is hard is that the views are pretty slow the first time they are constructed&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;Schema&lt;br /&gt;&lt;/span&gt;&lt;pre class="textmate-source"&gt;&lt;pre class="sunburst"&gt;&lt;span class="meta meta_class meta_class_python"&gt;&lt;span class="storage storage_type storage_type_class storage_type_class_python"&gt;class&lt;/span&gt; &lt;span style="font-weight: bold;" class="entity entity_name entity_name_type entity_name_type_class entity_name_type_class_python"&gt;Anonmessage&lt;/span&gt;(&lt;span class="meta meta_class meta_class_inheritance meta_class_inheritance_python"&gt;&lt;span class="entity entity_other entity_other_inherited-class entity_other_inherited-class_python"&gt;Document&lt;/span&gt;&lt;/span&gt;)&lt;span style="font-weight: bold;"&gt;:&lt;/span&gt;&lt;/span&gt;&lt;br /&gt; subject &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;TextField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;br /&gt; read &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;BooleanField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;br /&gt; fromread &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;BooleanField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;br /&gt; toread &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;BooleanField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;br /&gt; msg &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;TextField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;br /&gt; background &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;TextField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;br /&gt; &lt;span class="support support_type support_type_python"&gt;type&lt;/span&gt; &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;TextField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;br /&gt; reply &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;TextField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;br /&gt; fromuid &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;LongField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;br /&gt; replycount &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;IntegerField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;br /&gt; touid &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;LongField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;br /&gt; created &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;DateTimeField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;span class="variable variable_parameter variable_parameter_function variable_parameter_function_python"&gt;default&lt;/span&gt;&lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt;&lt;span class="meta meta_function-call meta_function-call_python"&gt;datetime.datetime.now(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;/span&gt;)&lt;/span&gt;&lt;br /&gt; time &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt;&lt;span class="meta meta_function-call meta_function-call_python"&gt;TimeField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;span class="variable variable_parameter variable_parameter_function variable_parameter_function_python"&gt;default&lt;/span&gt;&lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt;&lt;span class="meta meta_function-call meta_function-call_python"&gt;datetime.datetime.now(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;/span&gt;)&lt;/span&gt;&lt;br /&gt; date &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;DateField(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;span class="variable variable_parameter variable_parameter_function variable_parameter_function_python"&gt;default&lt;/span&gt;&lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt;&lt;span class="meta meta_function-call meta_function-call_python"&gt;datetime.date.today(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;/span&gt;)&lt;/span&gt;&lt;/span&gt;)&lt;/span&gt;&lt;/pre&gt;&lt;/pre&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;&lt;br /&gt;Permanent view for inbox:&lt;br /&gt;&lt;/span&gt;&lt;span style="color: rgb(0, 0, 153);"&gt;function(d){&lt;/span&gt; &lt;span style="color: rgb(0, 0, 153);"&gt;  if(d.type != 'reply'){&lt;/span&gt; &lt;span style="color: rgb(0, 0, 153);"&gt;    emit(d.touid, d);&lt;/span&gt; &lt;span style="color: rgb(0, 0, 153);"&gt;    if(d.touid != d.fromuid){&lt;/span&gt; &lt;span style="color: rgb(0, 0, 153);"&gt;      if (d.replycount&gt;0)&lt;/span&gt; &lt;span style="color: rgb(0, 0, 153);"&gt;      emit(d.fromuid, d);&lt;/span&gt; &lt;span style="color: rgb(0, 0, 153);"&gt;    }&lt;/span&gt; &lt;span style="color: rgb(0, 0, 153);"&gt;  }&lt;/span&gt; &lt;span style="color: rgb(0, 0, 153);"&gt;}&lt;/span&gt;&lt;span style="font-weight: bold;"&gt;&lt;br /&gt;&lt;br /&gt;Count of number of messages in USER inbox&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;map&lt;/span&gt;:function(d){&lt;br /&gt;if(d.type != 'reply'){&lt;br /&gt;  emit(d.touid, 1);&lt;br /&gt;  if(d.touid != d.fromuid){&lt;br /&gt;    if (d.replycount&gt;0)&lt;br /&gt;    emit(d.fromuid, 1);&lt;br /&gt;  }&lt;br /&gt;}&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;reduce&lt;/span&gt;:function(keys, values) { return sum(values)}&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;Count of Sent items&lt;br /&gt;map&lt;/span&gt;:function(doc) {  emit(doc.fromuid, 1) }&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;reduce&lt;/span&gt;:function(keys, values) { return sum(values)}&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;View to get replies&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;map&lt;/span&gt;:function(d) { if (d.type=="reply" ) emit(d.reply,d); }&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;Usage from python&lt;/span&gt;&lt;br /&gt;&lt;pre class="textmate-source"&gt;&lt;pre class="sunburst"&gt;inbox_messages &lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt; &lt;span class="meta meta_structure meta_structure_list meta_structure_list_python"&gt;[ &lt;span class="meta meta_structure meta_structure_list meta_structure_list_item meta_structure_list_item_python"&gt;(x.id, x.value) &lt;span class="keyword keyword_control keyword_control_flow keyword_control_flow_python"&gt;for&lt;/span&gt; x &lt;span class="keyword keyword_operator keyword_operator_logical keyword_operator_logical_python"&gt;in&lt;/span&gt; &lt;span class="meta meta_function-call meta_function-call_python"&gt;anondb.view(&lt;span class="meta meta_function-call meta_function-call_arguments meta_function-call_arguments_python"&gt;&lt;span class="string string_quoted string_quoted_single string_quoted_single_single-line string_quoted_single_single-line_python"&gt;'_view/truthbox/inbox'&lt;/span&gt;,&lt;span class="variable variable_parameter variable_parameter_function variable_parameter_function_python"&gt;key&lt;/span&gt;&lt;span class="keyword keyword_operator keyword_operator_assignment keyword_operator_assignment_python"&gt;=&lt;/span&gt;z.uid&lt;/span&gt;)&lt;/span&gt;&lt;/span&gt; ]&lt;/span&gt;&lt;br /&gt;&lt;/pre&gt;&lt;/pre&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://apps.facebook.com/ukissme/anoninbox"&gt;&lt;img style="cursor: pointer; width: 372px; height: 400px;" src="http://1.bp.blogspot.com/_62vafIBqOVg/SWRozQ4BZXI/AAAAAAAAAGY/B82lErIN1yo/s400/screenshot+anoninbox.png" alt="" id="BLOGGER_PHOTO_ID_5288467092352361842" border="0" /&gt;&lt;/a&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://apps.facebook.com/ukissme/anoninbox"&gt;&lt;img style="cursor: pointer; width: 359px; height: 400px;" src="http://2.bp.blogspot.com/_62vafIBqOVg/SWRo5fDqsiI/AAAAAAAAAGg/k4M0NIPgKPM/s400/anoninbox+messages.png" alt="" id="BLOGGER_PHOTO_ID_5288467199238517282" border="0" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-1727560539205092371?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='related' href='http://apps.facebook.com/ukissme/anoninbox' title='Using couchdb to build a EMAIL messaging solution with threaded inline replies'/><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/1727560539205092371/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=1727560539205092371' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1727560539205092371'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1727560539205092371'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2009/01/using-couchdb-to-build-email-messaging.html' title='Using couchdb to build a EMAIL messaging solution with threaded inline replies'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/_62vafIBqOVg/SWRozQ4BZXI/AAAAAAAAAGY/B82lErIN1yo/s72-c/screenshot+anoninbox.png' height='72' width='72'/><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-5288828844447520547</id><published>2009-01-03T12:45:00.000-08:00</published><updated>2009-01-03T13:14:26.658-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='couchdb'/><title type='text'>Ten Reasons why Couchdb is better than Mysql</title><content type='html'>&lt;ol&gt;&lt;li&gt;No schema - schema less db, which means you can develop at the speed of your thought, you dont need to do a db update everytime you add a column&lt;br /&gt;&lt;/li&gt;&lt;li&gt;everything is over http, simple http, get post, put, delete requests which means works with  varnish/squid out of the box (i prefer varnish because it is clean and simple)&lt;/li&gt;&lt;li&gt;Attachments - you can store file attachments for eg., &lt;a href="http://apps.facebook.com/bdayecards"&gt;your greeting card to grandma can carry images, music, flash&lt;/a&gt;&lt;/li&gt;&lt;li&gt;Map Reduce - no more sql queries, use amazingly scalable map-reduce based views. Views once saved are lighting fast&lt;/li&gt;&lt;li&gt;Sexy Futon javascript interface, comes with a cool js interface for displaying and editing data&lt;/li&gt;&lt;li&gt;javascript server using mozilla spidermonkey to construct views - means no need php in flash&lt;br /&gt;&lt;/li&gt;&lt;li&gt;zero config replication - work from home with no internet&lt;/li&gt;&lt;li&gt;&lt;a href="http://youfindr.com/python_couchdb_library"&gt;python couchdb library&lt;/a&gt;&lt;/li&gt;&lt;li&gt;bulk updates, deletes - you can store 100000 docs in one post request&lt;/li&gt;&lt;li&gt;Each couchdb document is just a simple JSON compatible doc, no cruft just simple&lt;/li&gt;&lt;li&gt;(Bonus) Uses Erlang, which means it is scalable for multicore multiprocessor machines&lt;/li&gt;&lt;li&gt;(Extra Bonus) Low memory requirement Takes 150MB compared to 8Gig taken by Mysql for a similar db setup&lt;/li&gt;&lt;li&gt;similar to zodb, but much more cleaner and intuitive&lt;/li&gt;&lt;li&gt;&lt;a href="http://youfindr.com/couchdb_mailing_list"&gt;Extremely friendly community and developers  - Damien, Jchris, paul davis, noah slater, chris anderson, Jan&lt;/a&gt;&lt;br /&gt;&lt;/li&gt;&lt;/ol&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-5288828844447520547?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='related' href='http://youfindr.com/couchdb' title='Ten Reasons why Couchdb is better than Mysql'/><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/5288828844447520547/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=5288828844447520547' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5288828844447520547'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5288828844447520547'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2009/01/ten-reasons-why-couchdb-is-better-than.html' title='Ten Reasons why Couchdb is better than Mysql'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-1036854513320404654</id><published>2008-10-10T15:05:00.000-07:00</published><updated>2008-10-10T15:06:39.126-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='python api'/><title type='text'>Python Api for last.fm</title><content type='html'>&lt;a href="http://code.google.com/p/pylast/"&gt;Pylast&lt;/a&gt; Python Api for last.fm&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-1036854513320404654?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/1036854513320404654/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=1036854513320404654' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1036854513320404654'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1036854513320404654'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2008/10/python-api-for-lastfm.html' title='Python Api for last.fm'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-7998076637463337271</id><published>2008-10-02T14:08:00.000-07:00</published><updated>2008-10-02T14:11:45.915-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='torrents'/><category scheme='http://www.blogger.com/atom/ns#' term='piratebay'/><category scheme='http://www.blogger.com/atom/ns#' term='google search'/><category scheme='http://www.blogger.com/atom/ns#' term='gpirate'/><title type='text'>The best torrent search engine</title><content type='html'>I accidentally discovered this new torrent search engine... It is very clean, googley and really fast.. When I say blazingly fast... Incredibly high speeds... .02 seconds&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://gpirate.com/"&gt;&lt;img style="cursor: pointer;" src="http://2.bp.blogspot.com/_62vafIBqOVg/SOU44SchJBI/AAAAAAAAAEE/P7JYBRBjSl4/s400/logosearch.png" alt="" id="BLOGGER_PHOTO_ID_5252667080072832018" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Do give it a spin&lt;br /&gt;   &lt;br /&gt;Recommendations : &lt;a href="http://gpirate.com/"&gt;&lt;span style="font-weight: bold;"&gt;Highly Recommended&lt;/span&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-7998076637463337271?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='related' href='http://gpirate.com/?src=pylabs' title='The best torrent search engine'/><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/7998076637463337271/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=7998076637463337271' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/7998076637463337271'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/7998076637463337271'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2008/10/best-torrent-search-engine.html' title='The best torrent search engine'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_62vafIBqOVg/SOU44SchJBI/AAAAAAAAAEE/P7JYBRBjSl4/s72-c/logosearch.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8224647282712407761</id><published>2007-09-16T11:53:00.001-07:00</published><updated>2007-09-16T11:53:56.339-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='python datetime'/><title type='text'>python datetime guide</title><content type='html'>3. Dates and Times&lt;br /&gt;Introduction&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;#introduction&lt;br /&gt;# There are three common ways of manipulating dates in Python&lt;br /&gt;# mxDateTime - a popular third-party module (not discussed here) &lt;br /&gt;# time - a fairly low-level standard library module &lt;br /&gt;# datetime - a new library module for Python 2.3 and used for most of these samples &lt;br /&gt;# (I will use full names to show which module they are in, but you can also use&lt;br /&gt;# from datetime import datetime, timedelta and so on for convenience) &lt;br /&gt;&lt;br /&gt;import time&lt;br /&gt;import datetime&lt;br /&gt;&lt;br /&gt;print "Today is day", time.localtime()[7], "of the current year" &lt;br /&gt;# Today is day 218 of the current year&lt;br /&gt;&lt;br /&gt;today = datetime.date.today()&lt;br /&gt;print "Today is day", today.timetuple()[7], "of ", today.year&lt;br /&gt;# Today is day 218 of 2003&lt;br /&gt;&lt;br /&gt;print "Today is day", today.strftime("%j"), "of the current year" &lt;br /&gt;# Today is day 218 of the current year&lt;br /&gt; &lt;br /&gt;&lt;br /&gt;Finding Today's Date&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;# Finding todays date&lt;br /&gt;&lt;br /&gt;today = datetime.date.today()&lt;br /&gt;print "The date is", today &lt;br /&gt;#=&gt; The date is 2003-08-06&lt;br /&gt;&lt;br /&gt;# the function strftime() (string-format time) produces nice formatting&lt;br /&gt;# All codes are detailed at http://www.python.org/doc/current/lib/module-time.html&lt;br /&gt;print t.strftime("four-digit year: %Y, two-digit year: %y, month: %m, day: %d") &lt;br /&gt;#=&gt; four-digit year: 2003, two-digit year: 03, month: 08, day: 06&lt;br /&gt;&lt;br /&gt;Converting DMYHMS to Epoch Seconds&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;# Converting DMYHMS to Epoch Seconds&lt;br /&gt;# To work with Epoch Seconds, you need to use the time module&lt;br /&gt;&lt;br /&gt;# For the local timezone&lt;br /&gt;t = datetime.datetime.now()&lt;br /&gt;print "Epoch Seconds:", time.mktime(t.timetuple())&lt;br /&gt;#=&gt; Epoch Seconds: 1060199000.0&lt;br /&gt;&lt;br /&gt;# For UTC&lt;br /&gt;t = datetime.datetime.utcnow()&lt;br /&gt;print "Epoch Seconds:", time.mktime(t.timetuple())&lt;br /&gt;#=&gt; Epoch Seconds: 1060195503.0&lt;br /&gt;&lt;br /&gt;Converting Epoch Seconds to DMYHMS&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;# Converting Epoch Seconds to DMYHMS&lt;br /&gt;&lt;br /&gt;now = datetime.datetime.fromtimestamp(EpochSeconds)&lt;br /&gt;#or use datetime.datetime.utcfromtimestamp()&lt;br /&gt;print now&lt;br /&gt;#=&gt; datetime.datetime(2003, 8, 6, 20, 43, 20)&lt;br /&gt;print now.ctime()&lt;br /&gt;#=&gt; Wed Aug  6 20:43:20 2003&lt;br /&gt;&lt;br /&gt;# or with the time module&lt;br /&gt;oldtimetuple = time.localtime(EpochSeconds)&lt;br /&gt;# oldtimetuple contains (year, month, day, hour, minute, second, weekday, yearday, daylightSavingAdjustment) &lt;br /&gt;print oldtimetuple &lt;br /&gt;#=&gt; (2003, 8, 6, 20, 43, 20, 2, 218, 1)&lt;br /&gt;&lt;br /&gt;Adding to or Subtracting from a Date&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;# Adding to or Subtracting from a Date&lt;br /&gt;# Use the rather nice datetime.timedelta objects&lt;br /&gt;&lt;br /&gt;now = datetime.date(2003, 8, 6)&lt;br /&gt;difference1 = datetime.timedelta(days=1)&lt;br /&gt;difference2 = datetime.timedelta(weeks=-2)&lt;br /&gt;&lt;br /&gt;print "One day in the future is:", now + difference1&lt;br /&gt;#=&gt; One day in the future is: 2003-08-07&lt;br /&gt;&lt;br /&gt;print "Two weeks in the past is:", now + difference2&lt;br /&gt;#=&gt; Two weeks in the past is: 2003-07-23&lt;br /&gt;&lt;br /&gt;print datetime.date(2003, 8, 6) - datetime.date(2000, 8, 6)&lt;br /&gt;#=&gt; 1095 days, 0:00:00&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;birthtime = datetime.datetime(1973, 01, 18, 3, 45, 50)   # 1973-01-18 03:45:50&lt;br /&gt;&lt;br /&gt;interval = datetime.timedelta(seconds=5, minutes=17, hours=2, days=55) &lt;br /&gt;then = birthtime + interval&lt;br /&gt;&lt;br /&gt;print "Then is", then.ctime()&lt;br /&gt;#=&gt; Then is Wed Mar 14 06:02:55 1973&lt;br /&gt;&lt;br /&gt;print "Then is", then.strftime("%A %B %d %I:%M:%S %p %Y")&lt;br /&gt;#=&gt; Then is Wednesday March 14 06:02:55 AM 1973&lt;br /&gt;&lt;br /&gt;#-----------------------------&lt;br /&gt;when = datetime.datetime(1973, 1, 18) + datetime.timedelta(days=55) &lt;br /&gt;print "Nat was 55 days old on:", when.strftime("%m/%d/%Y").lstrip("0")&lt;br /&gt;#=&gt; Nat was 55 days old on: 3/14/1973&lt;br /&gt;&lt;br /&gt;Difference of Two Dates&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;# Dates produce timedeltas when subtracted.&lt;br /&gt;&lt;br /&gt;diff = date2 - date1&lt;br /&gt;diff = datetime.date(year1, month1, day1) - datetime.date(year2, month2, day2)&lt;br /&gt;#----------------------------- &lt;br /&gt;&lt;br /&gt;bree = datetime.datetime(1981, 6, 16, 4, 35, 25)&lt;br /&gt;nat  = datetime.datetime(1973, 1, 18, 3, 45, 50)&lt;br /&gt;&lt;br /&gt;difference = bree - nat&lt;br /&gt;print "There were", difference, "minutes between Nat and Bree"&lt;br /&gt;#=&gt; There were 3071 days, 0:49:35 between Nat and Bree&lt;br /&gt;&lt;br /&gt;weeks, days = divmod(difference.days, 7)&lt;br /&gt;&lt;br /&gt;minutes, seconds = divmod(difference.seconds, 60)&lt;br /&gt;hours, minutes = divmod(minutes, 60)&lt;br /&gt;&lt;br /&gt;print "%d weeks, %d days, %d:%d:%d" % (weeks, days, hours, minutes, seconds)&lt;br /&gt;#=&gt; 438 weeks, 5 days, 0:49:35&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;print "There were", difference.days, "days between Bree and Nat." &lt;br /&gt;#=&gt; There were 3071 days between bree and nat&lt;br /&gt;&lt;br /&gt;Day in a Week/Month/Year or Week Number&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;# Day in a Week/Month/Year or Week Number&lt;br /&gt;&lt;br /&gt;when = datetime.date(1981, 6, 16)&lt;br /&gt;&lt;br /&gt;print "16/6/1981 was:"&lt;br /&gt;print when.strftime("Day %w of the week (a %A). Day %d of the month (%B).")&lt;br /&gt;print when.strftime("Day %j of the year (%Y), in week %W of the year.")&lt;br /&gt;&lt;br /&gt;#=&gt; 16/6/1981 was:&lt;br /&gt;#=&gt; Day 2 of the week (a Tuesday). Day 16 of the month (June).&lt;br /&gt;#=&gt; Day 167 of the year (1981), in week 24 of the year.&lt;br /&gt;&lt;br /&gt;Parsing Dates and Times from Strings&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;# Parsing Dates and Times from Strings&lt;br /&gt;&lt;br /&gt;time.strptime("Tue Jun 16 20:18:03 1981")&lt;br /&gt;# (1981, 6, 16, 20, 18, 3, 1, 167, -1)&lt;br /&gt;&lt;br /&gt;time.strptime("16/6/1981", "%d/%m/%Y")&lt;br /&gt;# (1981, 6, 16, 0, 0, 0, 1, 167, -1)&lt;br /&gt;# strptime() can use any of the formatting codes from time.strftime()&lt;br /&gt;&lt;br /&gt;# The easiest way to convert this to a datetime seems to be; &lt;br /&gt;now = datetime.datetime(*time.strptime("16/6/1981", "%d/%m/%Y")[0:5])&lt;br /&gt;# the '*' operator unpacks the tuple, producing the argument list.&lt;br /&gt;&lt;br /&gt;Printing a Date&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;# Printing a Date&lt;br /&gt;# Use datetime.strftime() - see helpfiles in distro or at python.org&lt;br /&gt;&lt;br /&gt;print datetime.datetime.now().strftime("The date is %A (%a) %d/%m/%Y") &lt;br /&gt;#=&gt; The date is Friday (Fri) 08/08/2003&lt;br /&gt;&lt;br /&gt;High-Resolution Timers&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;# High Resolution Timers&lt;br /&gt;&lt;br /&gt;t1 = time.clock()&lt;br /&gt;# Do Stuff Here&lt;br /&gt;t2 = time.clock()&lt;br /&gt;print t2 - t1&lt;br /&gt;&lt;br /&gt;# 2.27236813618&lt;br /&gt;# Accuracy will depend on platform and OS,&lt;br /&gt;# but time.clock() uses the most accurate timer it can&lt;br /&gt;&lt;br /&gt;time.clock(); time.clock()&lt;br /&gt;# 174485.51365466841&lt;br /&gt;# 174485.55702610247&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;# Also useful;&lt;br /&gt;import timeit&lt;br /&gt;code = '[x for x in range(10) if x % 2 == 0]'&lt;br /&gt;eval(code)&lt;br /&gt;# [0, 2, 4, 6, 8]&lt;br /&gt;&lt;br /&gt;t = timeit.Timer(code)&lt;br /&gt;print "10,000 repeats of that code takes:", t.timeit(10000), "seconds" &lt;br /&gt;print "1,000,000 repeats of that code takes:", t.timeit(), "seconds"&lt;br /&gt;&lt;br /&gt;# 10,000 repeats of that code takes: 0.128238644856 seconds&lt;br /&gt;# 1,000,000 repeats of that code takes:  12.5396490336 seconds&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;import timeit&lt;br /&gt;code = 'import random; l = random.sample(xrange(10000000), 1000); l.sort()' &lt;br /&gt;t = timeit.Timer(code)&lt;br /&gt;&lt;br /&gt;print "Create a list of a thousand random numbers. Sort the list. Repeated a thousand times." &lt;br /&gt;print "Average Time:", t.timeit(1000) / 1000&lt;br /&gt;# Time taken: 5.24391507859&lt;br /&gt;&lt;br /&gt;Short Sleeps&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;# Short Sleeps&lt;br /&gt;&lt;br /&gt;seconds = 3.1&lt;br /&gt;time.sleep(seconds)&lt;br /&gt;print "boo"&lt;br /&gt;&lt;br /&gt;Program: hopdelta&lt;br /&gt;&lt;br /&gt;#----------------------------- &lt;br /&gt;# Program HopDelta&lt;br /&gt;# Save a raw email to disk and run "python hopdelta.py FILE"&lt;br /&gt;# and it will process the headers and show the time taken&lt;br /&gt;# for each server hop (nb: if server times are wrong, negative dates&lt;br /&gt;# might appear in the output).&lt;br /&gt;&lt;br /&gt;import datetime, email, email.Utils&lt;br /&gt;import os, sys, time&lt;br /&gt;&lt;br /&gt;def extract_date(hop):&lt;br /&gt;    # According to RFC822, the date will be prefixed with&lt;br /&gt;    # a semi-colon, and is the last part of a received&lt;br /&gt;    # header.&lt;br /&gt;    date_string = hop[hop.find(';')+2:]&lt;br /&gt;    date_string = date_string.strip()&lt;br /&gt;    time_tuple = email.Utils.parsedate(date_string)&lt;br /&gt;&lt;br /&gt;    # convert time_tuple to datetime&lt;br /&gt;    EpochSeconds = time.mktime(time_tuple) &lt;br /&gt;    dt = datetime.datetime.fromtimestamp(EpochSeconds)&lt;br /&gt;    return dt&lt;br /&gt;&lt;br /&gt;def process(filename):&lt;br /&gt;    # Main email file processing&lt;br /&gt;    # read the headers and process them&lt;br /&gt;    f = file(filename, 'rb')&lt;br /&gt;    msg = email.message_from_file(f)&lt;br /&gt;&lt;br /&gt;    hops = msg.get_all('received')&lt;br /&gt;    &lt;br /&gt;    # in reverse order, get the server(s) and date/time involved&lt;br /&gt;    hops.reverse()&lt;br /&gt;    results = []&lt;br /&gt;    for hop in hops:&lt;br /&gt;        hop = hop.lower()&lt;br /&gt;        &lt;br /&gt;        if hop.startswith('by'):  # 'Received: by' line&lt;br /&gt;            sender = "start"&lt;br /&gt;            receiver = hop[3:hop.find(' ',3)]&lt;br /&gt;            date = extract_date(hop)&lt;br /&gt;&lt;br /&gt;        else:  # 'Received: from' line&lt;br /&gt;            sender = hop[5:hop.find(' ',5)]&lt;br /&gt;            by = hop.find('by ')+3&lt;br /&gt;            receiver = hop[by:hop.find(' ', by)]&lt;br /&gt;            date = extract_date(hop)&lt;br /&gt;&lt;br /&gt;        results.append((sender, receiver, date))&lt;br /&gt;    output(results)&lt;br /&gt;&lt;br /&gt;def output(results):&lt;br /&gt;    print "Sender, Recipient, Time, Delta"&lt;br /&gt;    print&lt;br /&gt;    previous_dt = delta = 0&lt;br /&gt;    for (sender, receiver, date) in results:&lt;br /&gt;        if previous_dt:&lt;br /&gt;            delta = date - previous_dt&lt;br /&gt;        &lt;br /&gt;        print "%s, %s, %s, %s" % (sender,&lt;br /&gt;                               receiver,&lt;br /&gt;                               date.strftime("%Y/%d/%m %H:%M:%S"),&lt;br /&gt;                               delta)&lt;br /&gt;        print&lt;br /&gt;        previous_dt = date   &lt;br /&gt;            &lt;br /&gt;def main():&lt;br /&gt;    # Perform some basic argument checking&lt;br /&gt;    if len(sys.argv) != 2:&lt;br /&gt;        print "Usage: mailhop.py FILENAME"&lt;br /&gt;&lt;br /&gt;    else:&lt;br /&gt;        filename = sys.argv[1]&lt;br /&gt;        if os.path.isfile(filename):&lt;br /&gt;            process(filename)&lt;br /&gt;        else:&lt;br /&gt;            print filename, "doesn't seem to be a valid file."&lt;br /&gt;&lt;br /&gt;if __name__ == '__main__':&lt;br /&gt;    main()&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8224647282712407761?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8224647282712407761/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8224647282712407761' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8224647282712407761'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8224647282712407761'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/09/python-datetime-guide.html' title='python datetime guide'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2621450820029172487</id><published>2007-07-15T21:38:00.000-07:00</published><updated>2007-07-15T21:53:24.179-07:00</updated><title type='text'>svn deps</title><content type='html'>Cheetah&lt;br /&gt;Psycopg2&lt;br /&gt;PIL&lt;br /&gt;mx egenix&lt;br /&gt;&lt;span style="font-style: italic;"&gt;sudo python setup.py build --skip install&lt;/span&gt;&lt;br /&gt;aspell.so&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2621450820029172487?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2621450820029172487/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2621450820029172487' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2621450820029172487'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2621450820029172487'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/07/svn-deps.html' title='svn deps'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-6916319448493561028</id><published>2007-07-01T12:03:00.000-07:00</published><updated>2007-07-01T12:04:26.354-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='yum fedora'/><title type='text'>How to update to Fedora 7</title><content type='html'>&lt;div class="post" id="post-116"&gt;&lt;div class="entry"&gt;&lt;div class="snap_preview"&gt;&lt;p&gt;This is how I updated my FC6 to F7:&lt;/p&gt; &lt;p&gt;&lt;code&gt;wget ftp://download.fedora.redhat.com/pub/fedora/linux/releases/7/Fedora/i386/os/Fedora/fedora-release-7-3.noarch.rpm&lt;br /&gt;wget ftp://download.fedora.redhat.com/pub/fedora/linux/releases/7/Fedora/i386/os/Fedora/fedora-release-notes-7.0.0-1.noarch.rpm&lt;br /&gt;rpm -U fedora-release-7-3.noarch.rpm fedora-release-notes-7.0.0-1.noarch.rpm&lt;br /&gt;yum update&lt;/code&gt;&lt;/p&gt; &lt;p&gt;In theory that’s the only thing you need to do, in practice it’s not that easy. I had some dependency issues with pidgin and liferea so I just removed them:&lt;/p&gt; &lt;p&gt;&lt;code&gt;rpm -e pidgin liferea&lt;/code&gt;&lt;/p&gt; &lt;p&gt;Then yum complained about libexif being “not signed”. I tried to edit /etc/yum.conf and setting gpgcheck=0, that didn’t work, yum was still checking it. So I had to manually set gpgcheck=0 in all the repos on /etc/yum.repos.d/. Then it worked.&lt;/p&gt; &lt;p&gt;That’s it!&lt;/p&gt; &lt;/div&gt;    &lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-6916319448493561028?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/6916319448493561028/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=6916319448493561028' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6916319448493561028'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6916319448493561028'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/07/how-to-update-to-fedora-7.html' title='How to update to Fedora 7'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8846933292356109042</id><published>2007-06-27T18:37:00.000-07:00</published><updated>2007-06-27T18:38:21.325-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Adding a fast random row to a table in postgresql</title><content type='html'>ALTER TABLE posts ADD myrand DOUBLE PRECISION;UPDATE posts SET myrand = RANDOM(); CREATE INDEX myrand_posts ON posts(myrand,id); ANALYZE VERBOSE posts;&lt;br /&gt;ALTER TABLE posts ALTER myrand&lt;br /&gt;SET  DEFAULT RANDOM();&lt;br /&gt;ALTER TABLE posts ALTER myrand&lt;br /&gt;SET  NOT NULL;&lt;br /&gt;--SELECT * FROM posts WHERE myrand &gt;= (SELECT RANDOM() OFFSET 0) ORDER BY myrand ASC LIMIT 1;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8846933292356109042?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8846933292356109042/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8846933292356109042' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8846933292356109042'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8846933292356109042'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/06/adding-fast-random-row-to-table-in.html' title='Adding a fast random row to a table in postgresql'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-1836719565178833987</id><published>2007-06-26T11:45:00.000-07:00</published><updated>2007-06-26T11:46:07.791-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Boolean column in a table in postgres</title><content type='html'>CREATE INDEX polls_active_idx ON polls(done) WHERE done='t';&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-1836719565178833987?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/1836719565178833987/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=1836719565178833987' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1836719565178833987'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1836719565178833987'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/06/boolean-column-in-table-in-postgres.html' title='Boolean column in a table in postgres'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-5251190137425924014</id><published>2007-06-18T18:11:00.000-07:00</published><updated>2007-06-18T18:13:24.248-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='css'/><title type='text'>how to get auto align css centering ala mybloglog and facebook</title><content type='html'>body {    width:99%;max-width:1024px;  m&lt;strong&gt;argin:auto;text-align:center;&lt;/strong&gt;  }&lt;br /&gt;#wrap {       &lt;strong&gt;margin: auto;&lt;/strong&gt;    width:85%; &lt;strong&gt;text-align:left;&lt;/strong&gt;    }&lt;br /&gt;#main {    float:left;  width:74%; margin-top:1%;   }&lt;br /&gt;#sidebar {    float:right;max-width: 25%;width:25%;min-width: 5%;  padding:3px; }&lt;br /&gt;#footer {    background:#cc9;    clear:both;    }&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-5251190137425924014?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/5251190137425924014/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=5251190137425924014' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5251190137425924014'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5251190137425924014'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/06/how-to-get-auto-align-css-centering-ala.html' title='how to get auto align css centering ala mybloglog and facebook'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2996922914301633520</id><published>2007-05-10T19:32:00.001-07:00</published><updated>2007-05-10T19:32:26.525-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='tech'/><title type='text'>tech tools</title><content type='html'>&lt;a href="http://www.python.org/"&gt;Python&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.postgresql.org"&gt;postgres&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://aws.amazon.com/s3"&gt;Amazon S3&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://webpy.org/"&gt;webpy.org&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.crummy.com/software/BeautifulSoup/"&gt;Beautiful soup&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.lighttpd.net/"&gt;lighttpd &lt;/a&gt;or &lt;a href="http://www.nginx.net/"&gt;nginx&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://vim.org/"&gt;vim&lt;/a&gt; and &lt;a href="http://www.gnu.org/software/emacs/"&gt;emacs&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2996922914301633520?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2996922914301633520/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2996922914301633520' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2996922914301633520'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2996922914301633520'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/05/tech-tools.html' title='tech tools'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-5433404415018866837</id><published>2007-04-09T22:51:00.000-07:00</published><updated>2007-04-09T22:52:53.656-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Determining who's blocking who in Postgres</title><content type='html'>Determining who's blocking who in Postgres&lt;br /&gt;If you have databases like I do with lots of concurrent queries, you can sometime run into situations where you issue a query and it just hangs there blocked. Or, more likely somebody or something issues a query and then comes calling when it doesn't seem to be doing anything.&lt;br /&gt;&lt;br /&gt;Of course, you have the handy pg_stat_activity and pg_locks views at your disposal, but when it comes to determining exactly which queries are blocking which others and on what table, querying those alone is a tedious way to get the answer. What you really need is a query that sums it all up, in one neat and tidy bundle. Well, my friends here is such a query:&lt;br /&gt;&lt;br /&gt;SELECT&lt;br /&gt;bl.procpid as blocked_pid,&lt;br /&gt;bl.usename as user,&lt;br /&gt;bl.current_query as blocked_query,&lt;br /&gt;bl.query_start,&lt;br /&gt;relname as blocked_on ,&lt;br /&gt;lq.procpid as blocking_pid,&lt;br /&gt;lq.usename as user,&lt;br /&gt;lq.current_query as blocking_query,&lt;br /&gt;lq.query_start,&lt;br /&gt;pgl2.mode as lock_type&lt;br /&gt;FROM pg_stat_activity bl, pg_locks pgl1,&lt;br /&gt;pg_stat_activity lq, pg_locks pgl2, pg_class&lt;br /&gt;WHERE bl.procpid = pgl1.pid&lt;br /&gt;AND not pgl1.granted&lt;br /&gt;AND pg_class.oid = pgl1.relation&lt;br /&gt;AND pgl2.relation = pgl1.relation&lt;br /&gt;AND pgl2.granted&lt;br /&gt;AND lq.procpid = pgl2.pid;&lt;br /&gt;&lt;br /&gt;In extended mode (\x) psql returns something along these lines for this query:&lt;br /&gt;&lt;br /&gt;blocked_pid    | 21418&lt;br /&gt;user           | sueuser&lt;br /&gt;blocked_query  | insert values ('foo', 'bar', 'baz') &lt;br /&gt;  into extremely_large_table;&lt;br /&gt;query_start    | 2007-02-13 15:14:06.77606-08&lt;br /&gt;blocked_on     | extremely_large_table&lt;br /&gt;blocking_pid   | 21417&lt;br /&gt;user           | joeuser&lt;br /&gt;blocking_query | delete from extremely_large_table;&lt;br /&gt;query_start    | 2007-02-13 14:45:34.637675-08&lt;br /&gt;lock_type      | AccessExclusiveLock&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-5433404415018866837?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/5433404415018866837/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=5433404415018866837' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5433404415018866837'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5433404415018866837'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/04/determining-whos-blocking-who-in.html' title='Determining who&apos;s blocking who in Postgres'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-6439375692907913233</id><published>2007-04-09T22:50:00.001-07:00</published><updated>2007-04-09T22:50:26.600-07:00</updated><title type='text'>python spell checker</title><content type='html'>How to Write a Spelling Corrector&lt;br /&gt;In the past week, two friends (Dean and Bill) independently told me they were amazed at how Google does spelling correction so well and quickly. Type in a search like [speling] and Google comes back in 0.1 seconds or so with Did you mean: spelling. What surprised me is that I thought Dean and Bill, being highly accomplished engineers and mathematicians, would have good intuitions about statistical language processing problems such as spelling correction. But they didn't, and come to think of it, there's no reason they should. I figured they and many others could benefit from an explanation, and so on the plane back from my trip I wrote a toy spelling corrector, which I now share.&lt;br /&gt;&lt;br /&gt;Let's get right to it. I figured that in less than a plane flight, and in less than a page of code, I could write a spelling corrector that achieves 80 or 90% accuracy at a rate of at least 10 words per second. And in fact, here, in 20 lines of Python 2.5 code, is the complete spelling corrector:&lt;br /&gt;&lt;br /&gt;import re, string, collections&lt;br /&gt;&lt;br /&gt;def words(text): return re.findall('[a-z]+', text.lower()) &lt;br /&gt;&lt;br /&gt;def train(features):&lt;br /&gt;    model = collections.defaultdict(lambda: 1)&lt;br /&gt;    for f in features:&lt;br /&gt;        model[f] += 1&lt;br /&gt;    return model&lt;br /&gt;&lt;br /&gt;NWORDS = train(words(file('Documents/holmes.txt').read()))&lt;br /&gt;&lt;br /&gt;def edits1(word):&lt;br /&gt;    n = len(word)&lt;br /&gt;    return set([word[0:i]+word[i+1:] for i in range(n)] + ## deletion&lt;br /&gt;               [word[0:i]+word[i+1]+word[i]+word[i+2:] for i in range(n-1)] + ## transposition&lt;br /&gt;               [word[0:i]+c+word[i+1:] for i in range(n) for c in string.lowercase] + ## alteration&lt;br /&gt;               [word[0:i]+c+word[i:] for i in range(n+1) for c in string.lowercase]) ## insertion&lt;br /&gt;&lt;br /&gt;def known_edits2(word):&lt;br /&gt;    return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2 in NWORDS)&lt;br /&gt;&lt;br /&gt;def known(words): return set(w for w in words if w in NWORDS)&lt;br /&gt;&lt;br /&gt;def correct(word):&lt;br /&gt;    return max(known([word]) or known(edits1(word)) or known_edits2(word) or [word],&lt;br /&gt;               key=lambda w: NWORDS[w])&lt;br /&gt;&lt;br /&gt;This defines the function correct, which takes a word as input and returns a likely correction of that word. For example:&lt;br /&gt;&lt;br /&gt;&gt;&gt;&gt; correct('speling')&lt;br /&gt;'spelling'&lt;br /&gt;&gt;&gt;&gt; correct('korrecter')&lt;br /&gt;'corrector'&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-6439375692907913233?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/6439375692907913233/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=6439375692907913233' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6439375692907913233'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6439375692907913233'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/04/python-spell-checker.html' title='python spell checker'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8961919950041043185</id><published>2007-03-20T19:04:00.001-07:00</published><updated>2007-03-20T19:04:32.137-07:00</updated><title type='text'>mencoder convert to flash</title><content type='html'>mencoder pasta_maken_311205.avi -o video.flv -of lavf -ovc lavc -oac lavc -lavcopts vcodec=flv:vbitrate=500:autoaspect:mbd=2:mv0:trell:v4mv:cbp:last_pred=3:predia=2:dia=2:precmp=2:cmp=2:subcmp=2:preme=2:turbo:acodec=mp3:abitrate=56 -vf scale=320:240 -srate 22050 -af lavcresample=22050&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8961919950041043185?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8961919950041043185/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8961919950041043185' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8961919950041043185'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8961919950041043185'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/03/mencoder-convert-to-flash.html' title='mencoder convert to flash'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8936879184762572307</id><published>2007-03-19T16:49:00.000-07:00</published><updated>2007-03-19T16:50:20.592-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='python'/><category scheme='http://www.blogger.com/atom/ns#' term='threading'/><title type='text'>thread control clean</title><content type='html'>&lt;p&gt;An example of an idiom for controling threads&lt;br /&gt;&lt;br /&gt;Doug  Fort&lt;br /&gt;http://www.dougfort.net&lt;br /&gt;"""&lt;br /&gt;&lt;br /&gt;import threading&lt;br /&gt;&lt;br /&gt;class  TestThread(threading.Thread):&lt;br /&gt;"""&lt;br /&gt;A sample thread class&lt;br /&gt;"""&lt;br /&gt;&lt;br /&gt;def  __init__(self):&lt;br /&gt;"""&lt;br /&gt;Constructor, setting initial  variables&lt;br /&gt;"""&lt;br /&gt;self._stopevent = threading.Event()&lt;br /&gt;self._sleepperiod =  1.0&lt;br /&gt;&lt;br /&gt;threading.Thread.__init__(self, name="TestThread")&lt;br /&gt;&lt;br /&gt;def  run(self):&lt;br /&gt;"""&lt;br /&gt;overload of threading.thread.run()&lt;br /&gt;main control  loop&lt;br /&gt;"""&lt;br /&gt;print "%s starts" % (self.getName(),)&lt;br /&gt;&lt;br /&gt;count = 0&lt;br /&gt;while  not self._stopevent.isSet():&lt;br /&gt;count += 1&lt;br /&gt;print "loop %d" %  (count,)&lt;br /&gt;self._stopevent.wait(self._sleepperiod)&lt;br /&gt;&lt;br /&gt;print "%s ends" %  (self.getName(),)&lt;br /&gt;&lt;br /&gt;def join(self,timeout=None):&lt;br /&gt;"""&lt;br /&gt;Stop the  thread&lt;br /&gt;"""&lt;br /&gt;self._stopevent.set()&lt;br /&gt;threading.Thread.join(self,  timeout)&lt;br /&gt;&lt;br /&gt;if __name__ == "__main__":&lt;br /&gt;testthread =  TestThread()&lt;br /&gt;testthread.start()&lt;br /&gt;&lt;br /&gt;import  time&lt;br /&gt;time.sleep(10.0)&lt;br /&gt;&lt;br /&gt;testthread.join()&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8936879184762572307?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8936879184762572307/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8936879184762572307' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8936879184762572307'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8936879184762572307'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/03/thread-control-clean.html' title='thread control clean'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-80482515622944565</id><published>2007-03-10T10:57:00.000-08:00</published><updated>2007-03-10T10:58:23.104-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='openpoker'/><category scheme='http://www.blogger.com/atom/ns#' term='erlang'/><title type='text'>OpenPoker how it works</title><content type='html'>&lt;div class="primary"&gt;       &lt;h1&gt;Writing low-pain massively scalable multiplayer servers&lt;/h1&gt;              &lt;p&gt;Another oldie. First penned for &lt;a href="http://devmaster.net/"&gt;DevMaster&lt;/a&gt; over a year ago. &lt;a href="http://devmaster.net/articles/mmo-scalable-server"&gt;Original article&lt;/a&gt; comes with a &lt;a href="http://www.devmaster.net/forums/showthread.php?t=4185"&gt;heated forum discussion&lt;/a&gt;! &lt;/p&gt;  &lt;p&gt; I have since exited the poker business and removed related articles and links from this site. The Erlang source code is still available, though. I’ll dissect it in detail in a series of articles over the next few weeks. &lt;/p&gt;&lt;p&gt; Let me know if you have any trouble viewing this article. Use resizing page controls above to expand the page if you need to.&lt;br /&gt; &lt;/p&gt;&lt;h1&gt;Introduction&lt;/h1&gt;  &lt;p&gt;This article describes an alternative approach to building massively scalable online multiplayer systems using my &lt;a href="http://wagerlabs.com/openpoker.tgz"&gt;OpenPoker&lt;/a&gt; project as an example. OpenPoker is a massively multiplayer poker server with fault-tolerance, load balancing and unlimited scalability built-in. The source code to OpenPoker is available from my site under the GPL and comes in under 10,000 lines of code of which about 1/3 are dedicated to testing.&lt;/p&gt;  &lt;p&gt;I prototyped extensively before coming up with the final version of OpenPoker and tried Delphi, Python, C#, C/C++ and Scheme. I also wrote a full-blown poker engine in Common Lisp. While I did spend over 9 months on research and prototyping, the final rewrite only took about 6 weeks of coding. I attribute most of the time and cost savings to &lt;a href="http://www.erlang.org/"&gt;choosing Erlang as my platform&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;By comparison, it took a team of 4-5 people about 9 months to build the &lt;a href="http://sf.net/projects/openpoker"&gt;old OpenPoker&lt;/a&gt;. The original team also built a Windows poker client but even if I cut development time in half to account for this 1.5 month, it is far from 18 months that I will end up with. In today's world of bloated game development budgets such savings are nothing to sneeze at!&lt;/p&gt;  &lt;h1&gt;What is Erlang&lt;/h1&gt;  &lt;p&gt;I suggest you browse through the &lt;a href="http://www.erlang.org/faq/t1.html#AEN9"&gt;Erlang FAQ&lt;/a&gt; before continuing but I'll give you a quick summary here…&lt;/p&gt;  &lt;p&gt;Erlang is a functional, dynamically typed language with built-in support for concurrency. It was specifically designed by Ericsson for telecommunications applications such as controlling a switch or converting protocols, and thus is particularly suitable for building distributed, soft real-time concurrent systems.&lt;/p&gt;  &lt;p&gt;Applications written in Erlang are often composed of hundreds or thousands of lightweight processes communicating via message passing. Context switching between Erlang processes is typically one or two orders of magnitude cheaper than switching between threads in a C program.&lt;/p&gt;  &lt;p&gt;It's easy to write distributed applications in Erlang because its distribution mechanisms are transparent: programs need not be aware that they are distributed.&lt;/p&gt;  &lt;p&gt;The Erlang runtime environment is a virtual machine (VM), much like the Java virtual machine. This means that code compiled on one architecture runs anywhere. The runtime system also allows code in a running system to be updated without interrupting the program and the byte code can be compiled to native code when you need that extra boost.&lt;/p&gt;  &lt;p&gt;Please head to the &lt;a href="http://www.erlang.org/"&gt;Erlang site&lt;/a&gt; and check out the excellent resources in the &lt;a href="http://www.erlang.org/starting.html"&gt; Getting started&lt;/a&gt;, &lt;a href="http://www.erlang.org/doc.html"&gt;Documentation&lt;/a&gt; and &lt;a href="http://www.erlang.org/examples.html"&gt;Examples&lt;/a&gt; sections.&lt;/p&gt;  &lt;h1&gt;Why Erlang&lt;/h1&gt;  &lt;p&gt;The concurrency model built into Erlang makes it particularly suitable for writing online multiplayer servers.&lt;/p&gt;  &lt;p&gt;A massively scalable multiplayer backend in Erlang is built as a "cluster" with different "nodes" dedicated to different tasks. An Erlang node is an instance of the Erlang VM and you can run multiple Erlang nodes/VMs on your desktop, laptop or server. One node per CPU is recommended.&lt;/p&gt;  &lt;p&gt;Erlang nodes track all other nodes connected to them. All you need to do to add a new node to the cluster is point it to an existing node. As soon as the two nodes establish contact all other nodes in the cluster become aware of the new node.&lt;/p&gt;  &lt;p&gt;Erlang processes send messages to other processes using a process id which encodes information about the node where the process is running. Processes need not be aware of where other processes are located to communicate with them. A bunch of Erlang nodes linked together can be viewed as a grid or supercomputing facility.&lt;/p&gt;  &lt;p&gt;Players, NPCs and other entities in massively multiplayer games are best modelled as concurrently running processes but concurrency is notoriously hard to work with. Erlang makes concurrency easy.&lt;/p&gt;  &lt;p&gt;Erlang's bit syntax∞ makes it trivial to work with binary data and bests the structure packing/unpacking facilities of Perl and Python. This makes Erlang particularly suitable for handling binary network protocols.&lt;/p&gt;  &lt;h1&gt;The OpenPoker architecture&lt;/h1&gt;  &lt;p&gt;Everything in OpenPoker is a process. Players, bots, games, pots, etc. are all processes. For every poker client  connected to OpenPoker there's a player "proxy" handling network messages. Depending on whether the player is logged  in, some messages are ignored while others are passed to the process handling card game logic.&lt;/p&gt;  &lt;p&gt;The card game process is an uber-state machine composed of state machine modules for every stage of the game. This lets me treat card game logic as a Lego constructor and add new card games by putting together the state machine building blocks. Take a look at the start function in cardgame.erl if you want to learn more about my approach.&lt;/p&gt;  &lt;p&gt;The card game state machine lets different messages through depending on the game stage. It also uses a separate game process to handle the machinery common to all games such as keeping track of players, pots, limits and so on. When simulating 27,000 poker games on my laptop I found that I had about 136,000 players and close to 800,000 processes in total.&lt;/p&gt;  &lt;p&gt;That said, I would like to focus on how Erlang makes it simple to implement scalability, fault tolerance and load balancing using OpenPoker as an example. My approach is not particular to poker or card games. The same approach can be used to quickly put together massively scalable multiplayer backends, do it cheaply and with a minimum amount of pain.&lt;/p&gt;  &lt;h1&gt;Scalability&lt;/h1&gt;  &lt;p&gt;I implement scalability and load-balancing by means of a multi-tier architecture. The first tier is represented by gateway nodes. Game server nodes form tier two and Mnesia "master" nodes can be thought of as the third tier.&lt;/p&gt;  &lt;p&gt;Mnesia is the Erlang real-time distributed database. The &lt;a href="http://www.erlang.org/faq/x1351.html"&gt;Mnesia FAQ&lt;/a&gt; has a good explanation but Mnesia is basically a fast, replicating, in-memory database. There are no objects in Erlang but Mnesia can be thought of as object-oriented as it can store any Erlang data.&lt;/p&gt;  &lt;p&gt;There are two types of Mnesia nodes: those that write to disk and those that do not. Regardless of this, all Mnesia nodes keep their data in memory. Mnesia master nodes in OpenPoker are nodes that write to disk. Gateways and game servers pick up their database from Mnesia masters upon startup and are memory-only nodes.&lt;/p&gt;  &lt;p&gt;There's a handy set of command-line arguments that you can give to the Erlang VM and interpreter when starting up to tell Mnesia where the master database is located. After the new local Mnesia node establishes contact with the master Mnesia node, the new node becomes part of the master node’s cluster.&lt;/p&gt;  &lt;p&gt;Assuming that the master nodes are located on hosts apple and orange, adding a new gateway, game server, etc. node to your OpenPoker cluster is as simple as&lt;/p&gt;  &lt;pre class="code"&gt;erl -mnesia extra_db_nodes \['db@apple','db@orange'\] -s mnesia start&lt;/pre&gt;&lt;br /&gt;&lt;p&gt;where&lt;/p&gt; &lt;pre class="code"&gt;-s mnesia start&lt;/pre&gt;&lt;br /&gt;&lt;p&gt;is equivalent to starting Mnesia from the erlang shell like this&lt;/p&gt; &lt;pre class="code"&gt;erl -mnesia extra_db_nodes \['db@apple','db@orange'\]&lt;br /&gt;Erlang (BEAM) emulator version 5.4.8 [source] [hipe] [threads:0]&lt;br /&gt;&lt;br /&gt;Eshell V5.4.8  (abort with ^G)&lt;br /&gt;1&gt; mnesia:start().&lt;br /&gt;ok&lt;/pre&gt;&lt;br /&gt;&lt;p&gt;OpenPoker keeps configuration information in Mnesia tables and this information is automatically downloaded by  new nodes as soon as Mnesia starts. Zero configuration required!&lt;/p&gt; &lt;h1&gt;Fault tolerance&lt;/h1&gt; OpenPoker lets me grow as high as I want by adding cheap Linux boxes to my server farm. Put together a couple of  racks of 1U servers and you can easily handle 500,000 or even 1,000,000 players online. This would work just as well for a &lt;a href="http://en.wikipedia.org/wiki/MMORPG"&gt;MMORPG&lt;/a&gt; as for poker.&lt;p&gt;I can dedicate some boxes to run gateway nodes and some to be database masters that write database transactions to disk. I can dedicate the rest of my boxes to run my game servers. I can limit game servers to accept a maximum of, say, 5000 simultaneous players so that no more than 5000 players are affected when my game server box crashes.&lt;/p&gt; &lt;p&gt;It's important to note that no information is lost when a game server crashes since all the Mnesia database transactions are replicated in real-time to all other nodes running Mnesia, game server nodes included.&lt;/p&gt; &lt;p&gt;In case of errors some assistance from the game client is required for the player to smoothly reconnect to the OpenPoker cluster. As soon as the poker client notices a network error it should connect to the gateway, receive a new game server address in a hand-off packet and reconnect to the new game server. What happens then is a little tricky as different types of reconnect scenarios need to be handled.&lt;/p&gt; &lt;p&gt;OpenPoker will handle the following reconnect scenarios:&lt;/p&gt; &lt;ol type="1"&gt;&lt;li&gt;The game server crashed &lt;/li&gt;&lt;li&gt;The client crashed or timed out due to a network error &lt;/li&gt;&lt;li&gt;The player is online on a different connection &lt;/li&gt;&lt;li&gt;The player is online on a different connection and is in a game&lt;/li&gt;&lt;/ol&gt; &lt;p&gt;The most common scenario will probably be a poker client that disconnected due to a network error. A less likely but still possible scenario is a client reconnecting from one computer while already playing at another.&lt;/p&gt; &lt;p&gt;Each OpenPoker game buffers packets sent to players and every reconnecting poker client will first receive all  the game packets since the game started before starting to receiving packets as usual. OpenPoker uses TCP connections  so I don't need to worry about packet ordering – packets will simply arrive in proper order.&lt;/p&gt; &lt;p&gt;Every poker client connection is represented by two OpenPoker processes: the socket process and the actual player process. A visitor process with restricted functionality is used until the player logs in. Visitors cannot join games, for example. The socket process will be dead after a poker client disconnects while the player process will still be alive.&lt;/p&gt; &lt;p&gt;A player process can notice a dead socket when attempting to forward a game packet and should put itself into auto-play mode or fold the hand. The login code will check for the combination of a dead socket and live player process when reconnecting. The code to determine the condition looks like this:&lt;/p&gt; &lt;pre class="code"&gt;login({atomic, [Player]}, [_Nick, Pass|_] = Args)&lt;br /&gt; when is_record(Player, player) -&gt;&lt;br /&gt;   Player1 = Player#player {&lt;br /&gt;       socket = fix_pid(Player#player.socket),&lt;br /&gt;       pid = fix_pid(Player#player.pid)&lt;br /&gt;          },&lt;br /&gt;   Condition = check_player(Player1, [Pass],&lt;br /&gt;                [&lt;br /&gt;                 fun is_account_disabled/2,&lt;br /&gt;                 fun is_bad_password/2,&lt;br /&gt;                 fun is_player_busy/2,&lt;br /&gt;                 fun is_player_online/2,&lt;br /&gt;                 fun is_client_down/2,&lt;br /&gt;                 fun is_offline/2&lt;br /&gt;                ]),&lt;br /&gt;   ...&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;p&gt;whereas the conditions themselves will be determined by the following code:&lt;/p&gt; &lt;pre class="code"&gt;is_player_busy(Player, _) -&gt;&lt;br /&gt;   {Online, _} = is_player_online(Player, []),&lt;br /&gt;   Playing = Player#player.game /= none,&lt;br /&gt;   {Online and Playing, player_busy}.&lt;br /&gt;&lt;br /&gt;is_player_online(Player, _) -&gt;&lt;br /&gt;   SocketAlive = Player#player.socket /= none,&lt;br /&gt;   PlayerAlive = Player#player.pid /= none,&lt;br /&gt;   {SocketAlive and PlayerAlive, player_online}.&lt;br /&gt;&lt;br /&gt;is_client_down(Player, _) -&gt;&lt;br /&gt;   SocketDown = Player#player.socket == none,&lt;br /&gt;   PlayerAlive = Player#player.pid /= none,&lt;br /&gt;   {SocketDown and PlayerAlive, client_down}.&lt;br /&gt;&lt;br /&gt;is_offline(Player, _) -&gt;&lt;br /&gt;   SocketDown = Player#player.socket == none,&lt;br /&gt;   PlayerDown = Player#player.pid == none,&lt;br /&gt;   {SocketDown and PlayerDown, player_offline}.&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;p&gt;Notice that the first thing the login function does is to fix up dead process ids. This makes processing simple down the road and is accomplished with the following bits of code:&lt;/p&gt; &lt;pre class="code"&gt;fix_pid(Pid)&lt;br /&gt; when is_pid(Pid) -&gt;&lt;br /&gt;   case util:is_process_alive(Pid) of&lt;br /&gt;   true -&gt;&lt;br /&gt;       Pid;&lt;br /&gt;   _ -&gt;&lt;br /&gt;       none&lt;br /&gt;   end;&lt;br /&gt;&lt;br /&gt;fix_pid(Pid) -&gt;&lt;br /&gt;   Pid.&lt;br /&gt;&lt;/pre&gt;  &lt;p&gt;and&lt;/p&gt;  &lt;pre class="code"&gt;-module(util).&lt;br /&gt;&lt;br /&gt;&lt;p&gt;-export([is&lt;em&gt;process&lt;/em&gt;alive/1]).&lt;/p&gt;&lt;br /&gt;&lt;br /&gt;&lt;p&gt;is&lt;em&gt;process&lt;/em&gt;alive(Pid)&lt;br /&gt; when is_pid(Pid) -&gt;&lt;br /&gt;   rpc:call(node(Pid), erlang, is&lt;em&gt;process&lt;/em&gt;alive, [Pid]).&lt;br /&gt;&lt;/p&gt;&lt;/pre&gt;&lt;br /&gt; &lt;p&gt;A process id in Erlang includes the id of the node where the process is running. &lt;strong&gt;is_pid(Pid)&lt;/strong&gt; tells me if its argument is a process id (pid)  but cannot tell me if the process is alive or dead. Erlang’s built-in&lt;strong&gt; erlang:is_process_alive(Pid)&lt;/strong&gt; tells me whether a local process (running on the same node) is  dead or alive. There's no variant of is_process_alive for checking remote nodes.&lt;/p&gt;  &lt;p&gt;Fortunately, I can use the &lt;a href="http://erlang.se/doc/doc-5.4.8/lib/kernel-2.10.9/doc/html/rpc.html"&gt;Erlang rpc facility&lt;/a&gt; together with &lt;strong&gt;node(pid)&lt;/strong&gt;  to call &lt;strong&gt;is_process_alive()&lt;/strong&gt; on the remote node. In fact, this will work just as well on the local node so the code above functions as a universal  distributed process checker.&lt;/p&gt;  &lt;p&gt;All that is left to do is to act on the various login conditions. In the simplest case where the player is offline I start a player process, connect the player to the socket and update the player record.&lt;/p&gt;  &lt;pre class="code"&gt;login(Player, player_offline, [Nick, _, Socket]) -&gt;&lt;br /&gt;   {ok, Pid} = player:start(Nick),&lt;br /&gt;   OID = gen_server:call(Pid, 'ID'),&lt;br /&gt;   gen_server:cast(Pid, {'SOCKET', Socket}),&lt;br /&gt;   Player1 = Player#player {&lt;br /&gt;       oid = OID,&lt;br /&gt;       pid = Pid,&lt;br /&gt;       socket = Socket&lt;br /&gt;          },&lt;br /&gt;   {Player1, {ok, Pid}}.&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt; &lt;p&gt;Should the player login information not match I can return an error and increase the number of bad login attempts. If this number exceeds a predefined maximum I disable the account like this:&lt;/p&gt;  &lt;pre class="code"&gt;login(Player, bad_password, _) -&gt;&lt;br /&gt;   N = Player#player.login_errors + 1,&lt;br /&gt;   {atomic, MaxLoginErrors} =&lt;br /&gt;   db:get(cluster&lt;em&gt;config, 0, max&lt;/em&gt;login_errors),&lt;br /&gt;   if&lt;br /&gt;   N &gt; MaxLoginErrors -&gt;&lt;br /&gt;       Player1 = Player#player {&lt;br /&gt;           disabled = true&lt;br /&gt;              },&lt;br /&gt;       {Player1, {error, ?ERR&lt;em&gt;ACCOUNT&lt;/em&gt;DISABLED}};&lt;br /&gt;   true -&gt;&lt;br /&gt;       Player1 = Player#player {&lt;br /&gt;           login_errors = N&lt;br /&gt;              },&lt;br /&gt;       {Player1, {error, ?ERR&lt;em&gt;BAD&lt;/em&gt;LOGIN}}&lt;br /&gt;   end;&lt;br /&gt;&lt;br /&gt;&lt;p&gt;login(Player, account_disabled, _) -&gt;&lt;br /&gt;   {Player, {error, ?ERR&lt;em&gt;ACCOUNT&lt;/em&gt;DISABLED}};&lt;br /&gt;&lt;/p&gt;&lt;/pre&gt;&lt;br /&gt; &lt;p&gt;Logging out the player involves finding the player process id using their &lt;strong&gt;Object ID&lt;/strong&gt; (which is just a number), stopping the player process and  updating the player record in the database. This is accomplished by the following bit of code:&lt;/p&gt;  &lt;pre class="code"&gt;logout(OID) -&gt;&lt;br /&gt;   case db:find(player, OID) of&lt;br /&gt;   {atomic, [Player]} -&gt;&lt;br /&gt;       player:stop(Player#player.pid),&lt;br /&gt;       {atomic, ok} = db:set(player, OID,&lt;br /&gt;                 [{pid, none},&lt;br /&gt;                  {socket, none}]);&lt;br /&gt;   _ -&gt;&lt;br /&gt;       oops&lt;br /&gt;   end.&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt; &lt;p&gt;With logout out of the way I can address the various reconnect conditions. If the player is online but idle, i.e. hanging out in the lobby or watching a game (drinking a Bud? Wazzup!) and is reconnecting from a different computer, I can just log them out and log them back in as if they were offline:&lt;/p&gt;  &lt;pre class="code"&gt;login(Player, player_online, Args) -&gt;&lt;br /&gt;   logout(Player#player.oid),&lt;br /&gt;   login(Player, player_offline, Args);&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt; &lt;p&gt;If the player was idle when their poker client disconnected then all I need to do is replace the socket process id in the player record and tell the player process about the new socket.&lt;/p&gt;  &lt;pre class="code"&gt;login(Player, client&lt;em&gt;down, [&lt;/em&gt;, _, Socket]) -&gt;&lt;br /&gt;   gen_server:cast(Player#player.pid, {'SOCKET', Socket}),&lt;br /&gt;   Player1 = Player#player {&lt;br /&gt;       socket = Socket&lt;br /&gt;          },&lt;br /&gt;   {Player1, {ok, Player#player.pid}};&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt; &lt;p&gt;If the player was in a game then we run the code above &lt;strong&gt;and&lt;/strong&gt; tell the game to resend the event history. &lt;/p&gt;  &lt;pre class="code"&gt;login(Player, player_busy, Args) -&gt;&lt;br /&gt;   Temp = login(Player, client_down, Args),&lt;br /&gt;   cardgame:cast(Player#player.game,&lt;br /&gt;         {'RESEND UPDATES', Player#player.pid}),&lt;br /&gt;   Temp;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt; &lt;p&gt;Overall, a combination of a real-time replicating database, a poker client that knows to reconnect to a different game server and some crafty login code allows me to provide a high degree of fault tolerance transparently to the player.&lt;/p&gt;  &lt;h1&gt;Load balancing&lt;/h1&gt;  &lt;p&gt;I can build my OpenPoker cluster from as many game server nodes as I want . I might want to allocate, say, 5000 players per game server and spread the load among the active game servers in my cluster. I can add new game servers at any time and they will automatically make themselves available to accept new players.&lt;/p&gt;  &lt;p&gt;Gateway nodes spread the player load among the active game servers in the OpenPoker cluster. The job of a gateway node is to pick a random game server, ask it for the number of players connected and its address, host and port number where the game server is running. As soon as the gateway finds a game server where the number of players connected is less than the preset maximum it will return the address of that game server to the connected poker client and close the connection.&lt;/p&gt;  &lt;p&gt;There's absolutely no load on gateway nodes and connections to them are extremely short-lived. You can have any cheap box acting as your gateway node.&lt;/p&gt;  &lt;p&gt;Nodes should generally come at least in pairs so that if one node fails another one can take over. You would need a mechanism like &lt;a href="http://hacks.oreilly.com/pub/h/79"&gt;Round-robin DNS&lt;/a&gt; to employ more than a single gateway node.&lt;/p&gt;  &lt;p&gt;How do gateways learn about game servers?&lt;/p&gt;  &lt;p&gt;OpenPoker uses the Erlang &lt;a href="http://erlang.se/doc/doc-5.4.8/lib/kernel-2.10.9/doc/html/pg2.html"&gt;Distributed  Named Process Groups&lt;/a&gt; facility to group game servers. The group is globally visible on all nodes, this happens automatically. New game servers join the game server group and when a game server node goes down it's automatically deleted.&lt;/p&gt;  &lt;p&gt;This is what the code to find a game server with a maximum capacity of MaxPlayers looks like:&lt;/p&gt;  &lt;pre class="code"&gt;find_server(MaxPlayers) -&gt;&lt;br /&gt;   case pg2:get&lt;em&gt;closest&lt;/em&gt;pid(?GAME_SERVERS) of&lt;br /&gt;   Pid when is_pid(Pid) -&gt;&lt;br /&gt;       {Time, {Host, Port}} = timer:tc(gen_server, call, [Pid, 'WHERE']),&lt;br /&gt;       Count = gen_server:call(Pid, 'USER COUNT'),&lt;br /&gt;       if&lt;br /&gt;       Count &lt;&gt;&lt;br /&gt;           io:format("~s:~w: ~w players~n", [Host, Port, Count]),&lt;br /&gt;           {Host, Port};&lt;br /&gt;       true -&gt;&lt;br /&gt;           io:format("~s:~w is full...~n", [Host, Port]),&lt;br /&gt;           find_server(MaxPlayers)&lt;br /&gt;       end;&lt;br /&gt;   Any -&gt;&lt;br /&gt;       Any&lt;br /&gt;   end.&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt; &lt;p&gt;&lt;b&gt;pg2:get_closest_pid()&lt;/b&gt; returns a random game server process id since a gateway node is not expected to run any game servers. If a process id of the game server is returned I ask the game server for its address (host and port) as well the number of players connected. So long as the number of players connected is less than the maximum I return the game server address to the caller, otherwise I keep looking.&lt;/p&gt;  &lt;h1&gt;Multiple-outlet powerstrip middleware&lt;/h1&gt;  &lt;p&gt;OpenPoker is open source software and I have been pitching it to various poker vendors recently. All the vendors have the same problem with scalability and fault tolerance, even after several years of development. Some have recently finished major rewrites of their server software while others are just embarking on this journey. All of the vendors are heavily invested in their Java infrastructure and, understandably, do not want to switch to Erlang.&lt;/p&gt;  &lt;p&gt;Still, it sounds to me like there is a need to be filled. The more I think about it the more it looks like Erlang can still be used to provide a cost-efficient solution while keeping things simple and straightforward. I see this solution as a multiple-outlet electrical power strip, just like the one you are probably using right now.&lt;/p&gt;  &lt;p&gt;You write your game server as a simple socket-based server that uses a database backend. In fact, more likely than not this is how your game server is written now. Your game server is the standard electrical plug and multiple instances of your game server are plugged into my power outlets while players flow in through the other end.&lt;/p&gt;  &lt;p&gt;You supply the game servers and I provide you with scalability, load balancing, and fault tolerance. I keep players connected to the power strip and monitor your game servers, restarting them as needed. I switch your players to another game server when one goes down and you can plug in as many game servers as you like into my power outlets.&lt;/p&gt;  &lt;p&gt;The power strip middleware is a black box sitting between your players and your servers and likely won't even require any changes to your code. You will get all the benefits of a highly scalable, load-balanced, fault-tolerant solution while keeping your investment and modifying little of your existing infrastructure.&lt;/p&gt;  &lt;p&gt;You can write this middleware in Erlang today, run it on a Linux box with a kernel specially tuned for a high number of TCP connections and put this box in a demilitarized zone while keeping your game servers behind a firewall. Even if you don't, I suggest that you take a close look at Erlang today and think about using it to simplify your massively multiplayer server architectures. And I will be here to help!&lt;/p&gt;     &lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-80482515622944565?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/80482515622944565/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=80482515622944565' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/80482515622944565'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/80482515622944565'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/03/openpoker-how-it-works.html' title='OpenPoker how it works'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-1659596684977818967</id><published>2007-03-04T08:59:00.001-08:00</published><updated>2007-03-04T08:59:41.422-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='bash linux'/><title type='text'>10 useful bash utils</title><content type='html'>&lt;span style="text-decoration: underline; font-weight: bold;font-size:300;color:yellow;"  &gt; &lt;u&gt;Bash Cures Cancer&lt;/u&gt; &lt;/span&gt;&lt;br /&gt; &lt;span style=";font-size:120;color:yellow;"  &gt; Learn the UNIX/Linux command line &lt;/span&gt;&lt;br /&gt;&lt;a href="http://bashcurescancer.com/"&gt;Home&lt;/a&gt;     &lt;a href="http://bashcurescancer.com/s/"&gt;Search&lt;/a&gt;     &lt;a href="http://bashcurescancer.com/man/"&gt;Man Pages&lt;/a&gt;     &lt;a href="http://feeds.feedburner.com/BashCuresCancer" onclick="javascript:urchinTracker('/feed/');"&gt;RSS Feed&lt;/a&gt;    Email:  &lt;a href="http://spamdefeator.com/"&gt;&lt;img alt="SpamDefeator" src="http://spamdefeator.com/u/5d0" border="0" height="15" width="180" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;p&gt; &lt;span style="font-size:120;"&gt;Title: 10 Linux commands you've never used&lt;/span&gt;&lt;br /&gt;&lt;span style="font-size:120;"&gt;Published:  02-19-2007&lt;/span&gt; &lt;/p&gt;&lt;br /&gt;&lt;!-- BodyStart --&gt;  &lt;p&gt;It takes years maybe decades to master the commands available to you at the Linux shell prompt. Here are 10 that you will have never heard of or used. They are in no particular order. My favorite is mkfifo.&lt;/p&gt;   &lt;ol&gt; &lt;li&gt;&lt;a href="http://bashcurescancer.com/man/cmd/pgrep"&gt;pgrep&lt;/a&gt;, instead of: &lt;pre&gt;# ps -ef | egrep '^root ' | awk '{print $2}'&lt;br /&gt;1&lt;br /&gt;2&lt;br /&gt;3&lt;br /&gt;4&lt;br /&gt;5&lt;br /&gt;20&lt;br /&gt;21&lt;br /&gt;38&lt;br /&gt;39&lt;br /&gt;...&lt;br /&gt;&lt;/pre&gt;  &lt;p&gt;You can do this:&lt;/p&gt;  &lt;pre&gt;# pgrep -u root&lt;br /&gt;1&lt;br /&gt;2&lt;br /&gt;3&lt;br /&gt;4&lt;br /&gt;5&lt;br /&gt;20&lt;br /&gt;21&lt;br /&gt;38&lt;br /&gt;39&lt;br /&gt;...&lt;br /&gt;&lt;/pre&gt; &lt;/li&gt;&lt;li&gt;&lt;a href="http://bashcurescancer.com/man/cmd/pstree"&gt;pstree&lt;/a&gt;, list the processes in a tree format. This can be VERY useful when working with WebSphere or other heavy duty applications. &lt;pre&gt;# pstree&lt;br /&gt;init-+-acpid&lt;br /&gt;   |-atd&lt;br /&gt;   |-crond&lt;br /&gt;   |-cups-config-dae&lt;br /&gt;   |-cupsd&lt;br /&gt;   |-dbus-daemon-1&lt;br /&gt;   |-dhclient&lt;br /&gt;   |-events/0-+-aio/0&lt;br /&gt;   |          |-kacpid&lt;br /&gt;   |          |-kauditd&lt;br /&gt;   |          |-kblockd/0&lt;br /&gt;   |          |-khelper&lt;br /&gt;   |          |-kmirrord&lt;br /&gt;   |          `-2*[pdflush]&lt;br /&gt;   |-gpm&lt;br /&gt;   |-hald&lt;br /&gt;   |-khubd&lt;br /&gt;   |-2*[kjournald]&lt;br /&gt;   |-klogd&lt;br /&gt;   |-kseriod&lt;br /&gt;   |-ksoftirqd/0&lt;br /&gt;   |-kswapd0&lt;br /&gt;   |-login---bash&lt;br /&gt;   |-5*[mingetty]&lt;br /&gt;   |-portmap&lt;br /&gt;   |-rpc.idmapd&lt;br /&gt;   |-rpc.statd&lt;br /&gt;   |-2*[sendmail]&lt;br /&gt;   |-smartd&lt;br /&gt;   |-sshd---sshd---bash---pstree&lt;br /&gt;   |-syslogd&lt;br /&gt;   |-udevd&lt;br /&gt;   |-vsftpd&lt;br /&gt;   |-xfs&lt;br /&gt;   `-xinetd&lt;br /&gt;&lt;/pre&gt; &lt;/li&gt;&lt;li&gt;&lt;a href="http://bashcurescancer.com/man/cmd/bc"&gt;bc&lt;/a&gt; is an arbitrary precision calculator language. Which is great. I found it useful in that it can perform square root operations in shell scripts. &lt;a href="http://bashcurescancer.com/man/cmd/expr"&gt;expr&lt;/a&gt; does not support square roots. &lt;pre&gt;# ./sqrt&lt;br /&gt;Usage: sqrt number&lt;br /&gt;# ./sqrt 64&lt;br /&gt;8&lt;br /&gt;# ./sqrt 132112&lt;br /&gt;363&lt;br /&gt;# ./sqrt 1321121321&lt;br /&gt;36347&lt;br /&gt;&lt;/pre&gt;  &lt;p&gt;Here is the script:&lt;/p&gt;  &lt;pre&gt;# cat sqrt&lt;br /&gt;#!/bin/bash&lt;br /&gt;if [ $# -ne 1 ]&lt;br /&gt;then&lt;br /&gt;      echo 'Usage: sqrt number'&lt;br /&gt;      exit 1&lt;br /&gt;else&lt;br /&gt;      echo -e "sqrt($1)\nquit\n" | bc -q -i&lt;br /&gt;fi&lt;br /&gt;&lt;/pre&gt; &lt;/li&gt;&lt;li&gt;&lt;a href="http://bashcurescancer.com/man/cmd/split"&gt;split&lt;/a&gt;, have a large file that you need to split into smaller chucks? A mysqldump maybe? split is your command. Below I split a 250MB file into 2 megabyte chunks all starting with the prefix LF_. &lt;pre&gt;# ls -lh largefile&lt;br /&gt;-rw-r--r--  1 root root 251M Feb 19 10:27 largefile&lt;br /&gt;# split -b 2m largefile LF_&lt;br /&gt;# ls -lh LF_* | head -n 5&lt;br /&gt;-rw-r--r--  1 root root 2.0M Feb 19 10:29 LF_aa&lt;br /&gt;-rw-r--r--  1 root root 2.0M Feb 19 10:29 LF_ab&lt;br /&gt;-rw-r--r--  1 root root 2.0M Feb 19 10:29 LF_ac&lt;br /&gt;-rw-r--r--  1 root root 2.0M Feb 19 10:29 LF_ad&lt;br /&gt;-rw-r--r--  1 root root 2.0M Feb 19 10:29 LF_ae&lt;br /&gt;# ls -lh LF_* | wc -l&lt;br /&gt;126&lt;br /&gt;&lt;/pre&gt; &lt;/li&gt;&lt;li&gt;&lt;a href="http://bashcurescancer.com/man/cmd/nl"&gt;nl&lt;/a&gt; numbers lines. I had a script doing this for me for years until I found out about nl.  &lt;pre&gt;# head wireless.h&lt;br /&gt;/*&lt;br /&gt;* This file define a set of standard wireless extensions&lt;br /&gt;*&lt;br /&gt;* Version :    20      17.2.06&lt;br /&gt;*&lt;br /&gt;* Authors :    Jean Tourrilhes - HPL&lt;br /&gt;* Copyright (c) 1997-2006 Jean Tourrilhes, All Rights Reserved.&lt;br /&gt;*/&lt;br /&gt;&lt;br /&gt;#ifndef _LINUX_WIRELESS_H&lt;br /&gt;# nl wireless.h | head&lt;br /&gt;   1  /*&lt;br /&gt;   2   * This file define a set of standard wireless extensions&lt;br /&gt;   3   *&lt;br /&gt;   4   * Version :    20      17.2.06&lt;br /&gt;   5   *&lt;br /&gt;   6   * Authors :    Jean Tourrilhes - HPL&lt;br /&gt;   7   * Copyright (c) 1997-2006 Jean Tourrilhes, All Rights Reserved.&lt;br /&gt;   8   */&lt;br /&gt;&lt;br /&gt;   9  #ifndef _LINUX_WIRELESS_H&lt;br /&gt;&lt;/pre&gt; &lt;/li&gt;&lt;li&gt;&lt;a href="http://bashcurescancer.com/man/cmd/mkfifo"&gt;mkfifo&lt;/a&gt; is the coolest one. Sure you know how to create a pipeline piping the output of grep to less or maybe even perl. But do you know how to make two commands communicate through a named pipe?&lt;br /&gt;&lt;br /&gt;&lt;p&gt;First let me create the pipe and start writing to it:&lt;/p&gt;  &lt;img src="" alt="" /&gt; pipe" title="Making the pipe and writing to it" class="screenshot" src="http://bashcurescancer.com/media/10-linux-commands-youve-never-used/mkfifo-write-to-pipe.png" height="357" width="640"&gt; &lt;p&gt;Then read from it:&lt;/p&gt;  &lt;img alt="cat pipe" title="Reading from the pipe" class="screenshot" src="http://bashcurescancer.com/media/10-linux-commands-youve-never-used/read-from-pipe.png" height="360" width="642" /&gt;  &lt;/li&gt;&lt;li&gt;&lt;a href="http://bashcurescancer.com/man/cmd/ldd"&gt;ldd&lt;/a&gt;, want to know which Linux thread library java is linked to? &lt;pre&gt;# ldd /usr/java/jre1.5.0_11/bin/java&lt;br /&gt;      libpthread.so.0 =&gt; /lib/tls/libpthread.so.0 (0x00bd4000)&lt;br /&gt;      libdl.so.2 =&gt; /lib/libdl.so.2 (0x00b87000)&lt;br /&gt;      libc.so.6 =&gt; /lib/tls/libc.so.6 (0x00a5a000)&lt;br /&gt;      /lib/ld-linux.so.2 (0x00a3c000)&lt;br /&gt;&lt;/pre&gt; &lt;/li&gt;&lt;li&gt;&lt;a href="http://bashcurescancer.com/man/cmd/col"&gt;col&lt;/a&gt;, want to save man pages as plain text? &lt;pre&gt;# PAGER=cat&lt;br /&gt;# man less | col -b &gt; less.txt&lt;br /&gt;&lt;/pre&gt; &lt;/li&gt;&lt;li&gt;&lt;a href="http://bashcurescancer.com/man/cmd/xmlwf"&gt;xmlwf&lt;/a&gt;, need to know if a XML document is well formed? (A configuration file maybe..) &lt;pre&gt;# curl -s 'http://bashcurescancer.com' &gt; bcc.html&lt;br /&gt;# xmlwf bcc.html&lt;br /&gt;# perl -i -pe 's@&lt;br /&gt;@&lt;br /&gt;@g' bcc.html&lt;br /&gt;# xmlwf bcc.html&lt;br /&gt;bcc.html:104:2: mismatched tag&lt;br /&gt;&lt;/pre&gt; &lt;/li&gt;&lt;li&gt;&lt;a href="http://bashcurescancer.com/man/cmd/lsof"&gt;lsof&lt;/a&gt; lists open files. You can do all kinds of cool things with this. Like find  which ports are open: &lt;pre&gt;# lsof | grep TCP&lt;br /&gt;portmap    2587   rpc    4u     IPv4       5544                 TCP *:sunrpc (LISTEN)&lt;br /&gt;rpc.statd  2606  root    6u     IPv4       5585                 TCP *:668 (LISTEN)&lt;br /&gt;sshd       2788  root    3u     IPv6       5991                 TCP *:ssh (LISTEN)&lt;br /&gt;sendmail   2843  root    4u     IPv4       6160                 TCP badhd:smtp (LISTEN)&lt;br /&gt;vsftpd     9337  root    3u     IPv4      34949                 TCP *:ftp (LISTEN)&lt;br /&gt;cupsd     16459  root    0u     IPv4      41061                 TCP badhd:ipp (LISTEN)&lt;br /&gt;sshd      16892  root    3u     IPv6      61003                 TCP badhd.mshome.net:ssh-&gt;kontiki.mshome.net:4661 (ESTABLISHED)&lt;br /&gt;&lt;/pre&gt;  &lt;p&gt;&lt;span style="font-size:80;"&gt;Note: &lt;a href="http://www.openbsd101.com/"&gt;OpenBSD 101&lt;/a&gt; pointed out that "lsof -i TCP"  a better way to obtain this same information. Thanks!&lt;/span&gt;&lt;/p&gt;&lt;br /&gt;&lt;p&gt;Or find the number of open files a user has. Very important for running big applications like Oracle, DB2, or WebSphere:&lt;/p&gt;  &lt;pre&gt;# lsof | grep ' root ' | awk '{print $NF}' | sort | uniq | wc -l&lt;br /&gt;179&lt;br /&gt;&lt;/pre&gt; &lt;p&gt;&lt;span style="font-size:80;"&gt;Note: an anonymous commenter pointed out that you can replace sort | uniq with "sort -u". This is true, I forgot about the -u flag. Thanks!&lt;/span&gt;&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-1659596684977818967?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/1659596684977818967/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=1659596684977818967' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1659596684977818967'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1659596684977818967'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/03/10-useful-bash-utils.html' title='10 useful bash utils'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-5409192908992768177</id><published>2007-03-01T17:25:00.001-08:00</published><updated>2007-03-01T17:28:27.555-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='sql javascript berkeley'/><title type='text'>SQL queries from javascript</title><content type='html'>&lt;a onclick="return top.js.OpenExtLink(window,event,this)" href="http://trimpath.com/project/wiki/TrimQuery" target="_blank"&gt;http://trimpath.com/project/wiki/TrimQuery&lt;/a&gt;&lt;br /&gt;that is really cool..&lt;br /&gt;if this is combined with berkeley db.. it ll be awesome&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-5409192908992768177?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/5409192908992768177/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=5409192908992768177' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5409192908992768177'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5409192908992768177'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/03/sql-queries-from-javascript.html' title='SQL queries from javascript'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-5338943474852388184</id><published>2007-02-26T19:14:00.000-08:00</published><updated>2007-02-26T19:15:03.587-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='python'/><title type='text'>How to do form.fill</title><content type='html'>suppose you have a form like:&lt;br/&gt;&lt;br/&gt;form = web.form.Form(&lt;br/&gt;    web.form.Textbox('name', description='Name'),&lt;br/&gt;    web.form.Textbox('subject', description='Subject'),&lt;br/&gt;    web.form.Textarea('body', description='Message'),&lt;br/&gt;)&lt;br/&gt;&lt;br/&gt;You can pass a dictionary mapping values to form objects like:&lt;br/&gt;&lt;br/&gt;form.fill(&lt;br/&gt;    {&lt;br/&gt;        'subject':'This is my subject!',&lt;br/&gt;        'body':t'This is the body!',&lt;br/&gt;     }&lt;br/&gt;)&lt;br/&gt;&lt;br/&gt;and these values will fill your textboxes.&lt;br/&gt;&lt;br/&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-5338943474852388184?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/5338943474852388184/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=5338943474852388184' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5338943474852388184'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5338943474852388184'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/how-to-do-formfill.html' title='How to do form.fill'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2819139251967645093</id><published>2007-02-20T01:50:00.001-08:00</published><updated>2007-02-20T01:50:46.865-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='admin'/><title type='text'>X-Sendfile for large file transfers</title><content type='html'>I read Lighttpd’s weblog (&lt;a href="http://blog.lighttpd.net/"&gt;Lighty’s Life&lt;/a&gt;) regularly and I remember Jan talking about &lt;tt&gt;&lt;a href="http://blog.lighttpd.net/articles/2006/07/02/x-sendfile"&gt;X-Sendfile&lt;/a&gt;&lt;/tt&gt;. I thought it was interesting, but never really thought about using it…. Until today! &lt;p&gt;Basically, if you have a Ruby on Rails (or other environment) page that transfers a really massive file to the client, you should use X-Sendfile.&lt;/p&gt; &lt;p&gt;Here is what you’ll need:&lt;/p&gt; &lt;p&gt;*Lighttpd Configuration*&lt;/p&gt; &lt;p&gt;To the FastCGI section of your lighty config, along with host, port, etc., add &lt;tt&gt;“allow-x-send-file” =&gt; “enable”&lt;/tt&gt;&lt;/p&gt; &lt;pre&gt;&lt;code&gt;&lt;br /&gt;fastcgi.server = ( ".fcgi" =&gt;&lt;br /&gt; (  "server_1"  =&gt; ( "host" =&gt; "10.0.1.1", "port" =&gt; 8000, "allow-x-send-file" =&gt; "enable" )&lt;br /&gt;                        )&lt;br /&gt;&lt;/code&gt; &lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2819139251967645093?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2819139251967645093/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2819139251967645093' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2819139251967645093'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2819139251967645093'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/x-sendfile-for-large-file-transfers.html' title='X-Sendfile for large file transfers'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-3259173182738637586</id><published>2007-02-19T19:47:00.000-08:00</published><updated>2007-02-19T19:52:16.717-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres path postgis'/><title type='text'>Find shortest distance path postgis postgres linedata</title><content type='html'>You could consider using pgRouting (http://www.postlbs.org/) .&lt;br /&gt;&lt;br /&gt;This extension to Postgres/PostGIS needs some topology and topology support for PostGIS is still in its infancy and barely documented… L&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Now to implement ‘topology’ the *easy* way… ;-)&lt;br /&gt;&lt;br /&gt;Use OpenJump! (http://openjump.org)&lt;br /&gt;&lt;br /&gt;There’s a tool there called ‘Planar Graph’…&lt;br /&gt;&lt;br /&gt;You can use it to get every line segment documented (start and end nodes for each line).&lt;br /&gt;&lt;br /&gt;Add a ‘length’ field to your dataset.&lt;br /&gt;&lt;br /&gt;OpenJump also has a tool to calculate areas and lengths… ;-)&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Save your dataset from OpenJump into PostGIS.&lt;br /&gt;&lt;br /&gt;The ‘length’ field acts as an initial cost for turning….&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Then use the shortest_path() function from pgRouting and you’re on your way!&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Also heard from the developers of pgRouting that support for turn restrictions is somewhere in the near future… ;-)&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;(OJ Peopleà I’m posting this to the list as I think it’s useful ;-) )&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;HTH,&lt;br /&gt;&lt;br /&gt;Pedro Doria Meunier&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-3259173182738637586?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/3259173182738637586/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=3259173182738637586' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3259173182738637586'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3259173182738637586'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/find-shortest-distance-path-postgis.html' title='Find shortest distance path postgis postgres linedata'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-6329803094375403796</id><published>2007-02-18T19:21:00.000-08:00</published><updated>2007-02-18T19:22:44.276-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Making all columns in a table lower case</title><content type='html'>psql -c "\d &lt;span style="font-weight: bold;"&gt;tablename&lt;/span&gt;" &gt; 1.txt&lt;br /&gt;get the table structure&lt;br /&gt;cat 1.txt | awk '{print $1}' &gt; 2.txt&lt;br /&gt;get the 1st word (column name)&lt;br /&gt;for x in `cat 2.txt`; do&lt;br /&gt;    echo "alter table  &lt;span style="font-weight: bold;"&gt;tablename&lt;/span&gt; rename \"$x\" to $x;" &gt;&gt; 3.txt&lt;br /&gt;done&lt;br /&gt;build a file of sql commands to run&lt;br /&gt;then, paste these into psql&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-6329803094375403796?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/6329803094375403796/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=6329803094375403796' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6329803094375403796'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6329803094375403796'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/making-all-columns-in-table-lower-case.html' title='Making all columns in a table lower case'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-3122108252578607490</id><published>2007-02-18T09:59:00.000-08:00</published><updated>2007-02-18T10:02:41.078-08:00</updated><title type='text'>import zcta zip data to postgres postgis</title><content type='html'>&lt;p&gt;Often you will receive data in a non-spatial form such as comma delimited data with latitude and longitude fields. To take full advantage of PostGIS spatial abilities, you will want to create geometry fields in your new table and update that field using the longitude latitude fields you have available. &lt;/p&gt;  &lt;p&gt;&lt;b&gt;General Note:&lt;/b&gt; All the command statements that follow should be run from the &lt;b&gt;PgAdminIII Tools - Query Tool&lt;/b&gt; or any other PostGreSQL Administrative tool you have available.  If you are a command line freak - you can use the &lt;b&gt;psql&lt;/b&gt; command line tool packaged with PostGreSQL. &lt;/p&gt; &lt;h1&gt;Getting the data&lt;/h1&gt; &lt;p&gt;For this exercise, we will use US zip code tabulation areas instead of just Boston data. The techniques here will apply to any data you get actually. &lt;/p&gt;  &lt;p&gt; &lt;a href="http://www.census.gov/geo/www/gazetteer/places2k.html" target="_blank"&gt;First step is to download the data from US Census.&lt;/a&gt; http://www.census.gov/geo/www/gazetteer/places2k.html &lt;/p&gt;  &lt;h1&gt;Importing the Data into PostGreSQL&lt;/h1&gt; &lt;p&gt;PostGreSQL comes with a COPY function that allows you to import data from a delimited text file. Since the ZCTAs data is provided in fixed-width format, we can't import it easily without first converting it to a delimited such as the default tab-delimited format that COPY works with. Similarly for data in other formats such as DBF, you'll either want to convert it to delimited using tools such as excel, use a third party tool that will import from one format to another, or one of my favorite tools Microsoft Access that allows you to link any tables or do a straight import and export to any ODBC compliant database such as PostGreSQL. &lt;/p&gt; &lt;h2&gt;Create the table to import to&lt;/h2&gt; &lt;p&gt;First you will need to create the table in Postgres. You want to make sure the order of the fields is in the same order as the data you are importing.&lt;/p&gt; &lt;pre&gt;&lt;code&gt;&lt;br /&gt;CREATE TABLE zctas&lt;br /&gt;(&lt;br /&gt; state char(2),&lt;br /&gt; zcta char(5),&lt;br /&gt; junk varchar(100),&lt;br /&gt; population_tot int8,&lt;br /&gt; housing_tot int8,&lt;br /&gt; water_area_meter float8,&lt;br /&gt; land_area_meter float8,&lt;br /&gt; water_area_mile float8,&lt;br /&gt; land_area_mile float8,&lt;br /&gt; latitude float8,&lt;br /&gt; longitude float8&lt;br /&gt;)&lt;br /&gt;WITHOUT OIDS;&lt;br /&gt;&lt;/code&gt; &lt;/pre&gt; &lt;h2&gt;Convert from Fixed-width to Tab-Delimited&lt;/h2&gt; &lt;p&gt;For this part of the exercise, I'm going to use Microsoft Excel because it has a nice wizard for dealing with fixed-width and a lot of windows users have it already. If you open the zcta file in Excel, it should launch the Text Import Wizard. MS Access has a similarly nice wizard and can deal with files larger than excels 65000 some odd limitation. Note there are trillions of ways to do this step so I'm not going to bother going over the other ways. For non-MS Office users other office suites such as Open-Office probably have similar functionality. &lt;/p&gt; &lt;ol&gt;&lt;li&gt;Open the file in Excel.&lt;/li&gt;&lt;li&gt;Import Text Wizard should launch automatically and have Fixed-Width as an option&lt;/li&gt;&lt;li&gt;Look at the &lt;a href="http://www.census.gov/geo/www/gazetteer/places2k.html#zcta" target="_blank"&gt;ZCTA table layout spec http://www.census.gov/geo/www/gazetteer/places2k.html#zcta&lt;/a&gt; and set your breakouts the same as specified.  &lt;b&gt;For the above I broke out the Name field further into first 5 for zcta and the rest for a junk field.&lt;/b&gt;&lt;/li&gt;&lt;li&gt;Next File-&gt;Save As -&gt;Text (Tab delimited)(*.txt) -give it name of zcta5.tab&lt;/li&gt;&lt;li&gt;Copy the file to somewhere on your PostGreSQL server.&lt;/li&gt;&lt;h2&gt;The COPY command&lt;/h2&gt;&lt;p&gt;Now copy the data into the table using the COPY command. &lt;b&gt;Note the Copy command works using the PostGreSQL service so the file location must be specified relative to the Server.&lt;/b&gt;&lt;/p&gt;&lt;pre&gt;&lt;code&gt;&lt;br /&gt;    COPY zctas FROM 'C:/Downloads/GISData/zcta5.tab';&lt;br /&gt;&lt;/code&gt; &lt;/pre&gt;&lt;h1&gt;Creating and Populating the Geometry Field&lt;/h1&gt;&lt;h2&gt;Create the Geometry Field&lt;/h2&gt;&lt;p&gt;To create the Geometry field, use the AddGeometryColumn opengis function. This will add a geometry field to the specified table as well as adding a record to the geometry_columns meta table and creating useful constraints on the new field. A summary of the function can be found &lt;a href="http://postgis.refractions.net/docs/ch06.html#id2526109" target="_blank"&gt;here&lt;/a&gt; http://postgis.refractions.net/docs/ch06.html#id2526109. &lt;/p&gt;&lt;code&gt; SELECT AddGeometryColumn( 'public', 'zctas', 'thepoint_lonlat', 4269, 'POINT', 2 ); &lt;/code&gt;&lt;p&gt;The above code will create a geometry column named &lt;i&gt;thepoint_longlat&lt;/i&gt; in the table zctas that validates to make sure the inputs are 2-dimensional points in SRID 4269 (NAD83 longlat). &lt;/p&gt;&lt;h2&gt;Populate the Geometry Field using the Longitude and Latitude fields&lt;/h2&gt;&lt;pre&gt;&lt;code&gt;&lt;br /&gt;UPDATE zctas&lt;br /&gt;       SET thepoint_lonlat = PointFromText('POINT(' || longitude || ' ' || latitude || ')',4269)&lt;br /&gt;&lt;/code&gt; &lt;/pre&gt;&lt;p&gt;The above code will generate a Text representation of a point and convert this representation to a PostGis geometry object of spatial reference SRID 4269. &lt;/p&gt;&lt;p&gt;There are a couple of things I would like to point out that may not be apparently clear to people not familiar with PostGreSQL or PostGis&lt;/p&gt;&lt;ul&gt;&lt;li&gt; || is a string concatenator. It is actually the ANSI-standard way of concatenating strings together. In MySQL you would do this using the CONCAT function and in Microsoft SQL Server you would use +. Oracle also uses ||. So what the inner part of the code would do is to generate something that looks like &lt;b&gt;POINT(-97.014256 38.959448)&lt;/b&gt;.&lt;/li&gt;&lt;li&gt;You can't just put any arbitrary SRID in there and expect the system to magically transform to that. The SRID you specify has to be the reference system that your text representation is in. &lt;/li&gt;&lt;/ul&gt;&lt;h2&gt;Transforming to Another spatial reference system&lt;/h2&gt;&lt;p&gt;The above is great if you want your geometry in longlat spatial reference system. In many cases, longlat is not terribly useful. For example if you want to do distance queries with your data, you don't want your distance returned back in longlat. You want it in a metric that you normally measure things in. &lt;/p&gt;&lt;p&gt;In the code below, we will create a new geometry field that holds points in the &lt;b&gt;WGS 84 North Meter&lt;/b&gt; reference system and then updates that field accordingly.&lt;/p&gt;&lt;pre&gt;&lt;code&gt;&lt;br /&gt;SELECT AddGeometryColumn( 'public', 'zctas', 'thepoint_meter', 32661, 'POINT', 2 );&lt;br /&gt;&lt;br /&gt;UPDATE zctas&lt;br /&gt;SET thepoint_meter = transform(PointFromText('POINT(' || longitude || ' ' || latitude || ')',4269),32661) ;&lt;br /&gt;&lt;/code&gt; &lt;/pre&gt;&lt;h1&gt;Index your spatial fields&lt;/h1&gt;&lt;p&gt;One of the number one reasons for poor query performance is lack of attention to indexes. Putting in an index can make as much as a 100 fold difference in query speed depending on how many records you have in the table. For large updates and imports, you should put your indexes in after the load, because while indexes help query speed, updates against indexed fields can be very slow because they need to create index records for the updated/inserted data. In the below, we will be putting in GIST indexes against our spatial fields. &lt;/p&gt;&lt;pre&gt;&lt;code&gt;&lt;br /&gt;CREATE INDEX idx_zctas_thepoint_lonlat ON zctas&lt;br /&gt; USING GIST (thepoint_lonlat);&lt;br /&gt;&lt;br /&gt;CREATE INDEX idx_zctas_thepoint_meter ON zctas&lt;br /&gt; USING GIST (thepoint_meter);&lt;br /&gt;&lt;br /&gt;ALTER TABLE zctas ALTER COLUMN thepoint_meter SET NOT NULL;&lt;br /&gt;CLUSTER idx_zctas_thepoint_meter ON zctas;&lt;br /&gt;&lt;br /&gt;VACUUM ANALYZE zctas;&lt;br /&gt;&lt;/code&gt; &lt;/pre&gt;&lt;p&gt;In the above after we create the indexes, we put in a constraint to not allow nulls in the &lt;b&gt;thepoint_meter&lt;/b&gt; field. The not null constraint is required for clustering since as of now, clustering is not allowed on gist indexes that have null values. Next we cluster on this index. Clustering basically physically reorders the table in the order of the index. In general spatial queries are much slower than attribute based queries, so if you do a fair amount of spatial queries, you get a huge gain. &lt;/p&gt;&lt;p&gt;In the above we vacuum analyze the table to insure that index statistics are updated for our table.&lt;/p&gt;&lt;/ol&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-3122108252578607490?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/3122108252578607490/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=3122108252578607490' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3122108252578607490'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3122108252578607490'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/import-zcta-zip-data-to-postgres.html' title='import zcta zip data to postgres postgis'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-615836542962167060</id><published>2007-02-16T12:21:00.000-08:00</published><updated>2007-02-16T12:23:17.458-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='python'/><title type='text'>urlparse(url, scheme='', allow_fragments=1)</title><content type='html'>this is what we should use    urlparse(url, scheme='', allow_fragments=1)&lt;br /&gt;        Parse a URL into 6 components:&lt;br /&gt;        &lt;scheme&gt;://&lt;netloc&gt;/&lt;path&gt;;&lt;params&gt;?&lt;query&gt;#&lt;fragment&gt;&lt;br /&gt;        Return a 6-tuple: (scheme, netloc, path, params, query, fragment).&lt;br /&gt;        Note that we don't break the components up in smaller bits&lt;br /&gt;        (e.g. netloc is a single string) and we don't expand % escapes.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-615836542962167060?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/615836542962167060/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=615836542962167060' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/615836542962167060'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/615836542962167060'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/urlparseurl-scheme-allowfragments1.html' title='urlparse(url, scheme=&apos;&apos;, allow_fragments=1)'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8886868919867755317</id><published>2007-02-14T21:09:00.000-08:00</published><updated>2007-02-14T21:10:12.143-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>pgsql users, and schemas</title><content type='html'>so have you discovered pgsql users, and schemas yet&lt;br /&gt;psql -U postgres &lt;br /&gt;psql&gt; create user newuser password 'newuser';&lt;br /&gt;psql&gt; create schema newuser authorization newuser;&lt;br /&gt;psql&gt; \q&lt;br /&gt;# psql -U newuser &lt;br /&gt;psql&gt; create table foo();&lt;br /&gt;psql&gt; \d&lt;br /&gt;the table is then owned by newuser user, in its own schema (like a namespace). this is somewhat how oracle user/schema owned tables are done.&lt;br /&gt;this lets you have one database and then have many users within that database, each in their own schema.&lt;br /&gt;this is same effect for having a different database for each project i guess.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8886868919867755317?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8886868919867755317/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8886868919867755317' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8886868919867755317'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8886868919867755317'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/pgsql-users-and-schemas.html' title='pgsql users, and schemas'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-322318658274324607</id><published>2007-02-14T11:29:00.000-08:00</published><updated>2007-02-14T11:32:21.935-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Adding a new id column Primary key for an existing table</title><content type='html'>10k rows. varchar types, no indexes. takes a while it seems.&lt;br /&gt;ok j00 ready?&lt;br /&gt;this is what i did:&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;alter table localeze_amacai_business add id integer;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;create sequence localeze_amacai_business_id_seq;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;create or replace function assign_localize_pk ()&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;returns integer as $_$&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;DECLARE&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt; _id        integer;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt; _count    integer;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;_row        record; &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;BEGIN&lt;/span&gt;&lt;br /&gt;   &lt;br /&gt;&lt;span style="font-weight: bold;"&gt;    for _row in select * from localeze_amacai_business LOOP&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;        select into _id nextval('localeze_amacai_business_id_seq');&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;        update localeze_amacai_business&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;        set id = _id&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;        where "PERSISTENTRECORDID" = _row."PERSISTENTRECORDID";&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;    END LOOP;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;    return _count;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;END;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;$_$ language plpgsql;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;select assign_localize_pk();&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;drop function assign_localize_pk();&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;alter table localeze_amacai_business alter id set not null;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;alter table localeze_amacai_business add constraint localeze_amacai_business_pk primary key (id);&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;but how 2 set that sequence to this new table&lt;br /&gt;11:18:30 am&lt;br /&gt;Travis&lt;br /&gt;so now it was built using the sequence&lt;br /&gt;&lt;span style="font-weight: bold;"&gt; select last_value from localeze_amacai_business_id_seq&lt;/span&gt;&lt;br /&gt;coup-# ;&lt;br /&gt;last_value&lt;br /&gt;------------&lt;br /&gt;     9258&lt;br /&gt;oh, thats e-z&lt;br /&gt;just:&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;alter table localeze_amacai_business alter id set default nextval('localeze_amacai_business_id_seq');&lt;/span&gt;&lt;br /&gt;so now new inserts will invoke the sequence and you dont have to specify the id value&lt;br /&gt;&lt;br /&gt;or you could always do&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;select into _id nextval('localeze_amacai_business_id_seq');&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;insert into. ... (id, ...) values (_id, ..)&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;table&lt;br /&gt;i do not understand&lt;br /&gt;i do not want to specify id values&lt;br /&gt;11:20:33 am&lt;br /&gt;Travis&lt;br /&gt;i mod the existing table , added that "id" column to it&lt;br /&gt;and it defaults to the sequence now&lt;br /&gt;so how do you relate to the table if you dont care what its id is?&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-322318658274324607?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/322318658274324607/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=322318658274324607' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/322318658274324607'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/322318658274324607'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/adding-new-id-column-primary-key-for.html' title='Adding a new id column Primary key for an existing table'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-3261788714654673159</id><published>2007-02-14T11:04:00.000-08:00</published><updated>2007-02-14T11:05:23.052-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>autoincrement sequence id</title><content type='html'>postgresql now has the insert into .. working&lt;br /&gt;so create table2 table with all the columns that table1 (original has)&lt;br /&gt;hm, actually select into needs the table to not exist&lt;br /&gt;i guess it is possible in postgresql too, wher you can have the original table, and a new table with the id pk column and then make  a plpgsql function that &lt;br /&gt;for _row in select * from table LOOP&lt;br /&gt;  select into _id nextval('a_sequence');&lt;br /&gt;  insert into new_table(id, ....) values (_id, .....);&lt;br /&gt; LOOP&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-3261788714654673159?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/3261788714654673159/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=3261788714654673159' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3261788714654673159'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3261788714654673159'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/autoincrement-sequence-id.html' title='autoincrement sequence id'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-6056889114874808724</id><published>2007-02-14T11:02:00.000-08:00</published><updated>2007-02-14T11:03:09.906-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>strip_phone_number</title><content type='html'>create or replace function strip_phone_number(&lt;br /&gt;  _in varchar&lt;br /&gt;) returns varchar as $_$&lt;br /&gt;DECLARE&lt;br /&gt; _len  integer;&lt;br /&gt; _i    integer;&lt;br /&gt; _chr  varchar;&lt;br /&gt; _test  varchar;&lt;br /&gt; _result varchar;&lt;br /&gt;BEGIN&lt;br /&gt;&lt;br /&gt;    select into _len length(_in);&lt;br /&gt;&lt;br /&gt;    _i := 0;&lt;br /&gt;    select into _result '';&lt;br /&gt;    while _i &lt;= _len  LOOP&lt;br /&gt;        _i := _i + 1;&lt;br /&gt;        select into _chr substring(_in from _i for 1);&lt;br /&gt;        select into _test substring(_chr from '[0-9]$');&lt;br /&gt;        --_test := _chr;&lt;br /&gt;        if (_test is not null) then&lt;br /&gt;            select into _result  _result || _test;&lt;br /&gt;        end if;&lt;br /&gt;        raise debug '%:%:%:%', _i, _chr, _test, _result;&lt;br /&gt;    END LOOP;&lt;br /&gt;&lt;br /&gt;    return _result;&lt;br /&gt;END;&lt;br /&gt;&lt;br /&gt;$_$ language plpgsql;&lt;br /&gt;&lt;br /&gt;(u will need to fix the tabs thing in msn pastes)&lt;br /&gt;select strip_phone_number('(123) 456-8909 x 1234');&lt;br /&gt; strip_phone_number&lt;br /&gt;--------------------&lt;br /&gt; 12345689091234&lt;br /&gt;(1 row)&lt;br /&gt;so that dumps the "not digit" characters from a string in pl/pgsql&lt;br /&gt;but what good does that do?&lt;br /&gt;dont you need the ui to undo that?&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-6056889114874808724?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/6056889114874808724/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=6056889114874808724' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6056889114874808724'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6056889114874808724'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/stripphonenumber.html' title='strip_phone_number'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-3028914292466034730</id><published>2007-02-11T11:03:00.000-08:00</published><updated>2007-02-11T11:08:37.769-08:00</updated><title type='text'>Import TIGER database to PostGIS</title><content type='html'>wget http://www.gdal.org/dl/fwtools/FWTools-linux-1.2.0.tar.bz2&lt;br /&gt;tar xjf FWTools-linux-1.2.0.tar.bz2&lt;br /&gt;cd FWTools-1.2.0/&lt;br /&gt;./install.sh&lt;br /&gt;export LD_LIBRARY_PATH=/www/ask/work/ogr/FWTools-1.2.0/lib&lt;br /&gt;export GDAL_DATA=/www/ask/work/ogr/FWTools-1.2.0/share&lt;br /&gt;./bin/ogr2ogr -update -append -f "PostGreSQL" PG:"host=localhost user=postgres dbname=mydbname password=password" /www/ask/work/tiger/TGR06001.RT1 layer CompleteChain -nln masuf -a_srs "EPSG:4269"&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-3028914292466034730?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/3028914292466034730/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=3028914292466034730' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3028914292466034730'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3028914292466034730'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/import-tiger-database-to-postgis.html' title='Import TIGER database to PostGIS'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-4638028759315495013</id><published>2007-02-10T20:28:00.000-08:00</published><updated>2007-02-10T19:16:34.734-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgis'/><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Installing postgis</title><content type='html'>yum install proj proj-devel&lt;br /&gt;&lt;br /&gt;#get postgres source&lt;br /&gt;cd contrib&lt;br /&gt;svn co http://svn.refractions.net/postgis/trunk postgis&lt;br /&gt;cd postgis&lt;br /&gt;./autogen.sh&lt;br /&gt;./configure --with-pgsql=/usr/local/pgsql/bin/pg_config&lt;br /&gt;make &amp;&amp;amp; sudo make install&lt;br /&gt;/usr/local/pgsql/bin/createlang plpgsql coupon&lt;br /&gt;/usr/local/pgsql/bin/createlang plpgsql coupon -U postgres&lt;br /&gt;/usr/local/pgsql/bin/psql -d coupon -f lwpostgis.sql -U postgres&lt;br /&gt;/usr/local/pgsql/bin/psql -d coupon -f spatial_ref_sys.sql -U postgres&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-4638028759315495013?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/4638028759315495013/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=4638028759315495013' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4638028759315495013'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4638028759315495013'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/installing-postgis.html' title='Installing postgis'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-6329348954211740068</id><published>2007-02-10T01:51:00.000-08:00</published><updated>2007-02-10T01:47:55.766-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='vim'/><title type='text'>Vi Commands</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://www.jess2.net/doc/vicmd.png"&gt;&lt;img src="http://www.jess2.net/doc/vicmd.png" border="0" alt="" /&gt;&lt;/a&gt;&lt;br/&gt; &lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-6329348954211740068?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/6329348954211740068/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=6329348954211740068' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6329348954211740068'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6329348954211740068'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/vi-commands.html' title='Vi Commands'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-675288142890427843</id><published>2007-02-10T01:43:00.000-08:00</published><updated>2007-02-10T01:20:09.646-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='coroutines'/><title type='text'>Why multithreaded design was avoided</title><content type='html'>Multithreaded environments can be a headache. Experienced programmers know that and try to avoid threads, while on the other hand inexperienced programmers find them quite attractive and usually make applications a mess. It all boils down to synchronization. Synchronization of threads can be very hard to get right and is wet ground for a great number bugs to grow. Add to that, that race conditions and thread-related bugs can be extremely hard to hunt down, since the condiitons to reproduce them may be unknown. The efficiency of threads is also a concern. The scripting engine for a game must be fast. The game world contains many actors that need to be updated at least every frame. You don’t want a scheduler to take up half of your CPU trying to decide which - of many, many actors - to run next. Also, if you have to spawn and delete bullet actors in the game (coming from a fast machine gun), you should start looking for thread pools and other techniques since spawning each bullet thread can take too long.&lt;br/&gt;&lt;br/&gt;To sum it up: below is the list of reasons that multithreaded environments where overlooked by game developers :&lt;br/&gt;&lt;br/&gt;    * Scheduling overhead&lt;br/&gt;    * Memory cost per thread&lt;br/&gt;    * Inefficient thread creation&lt;br/&gt;    * Synchronization problems&lt;br/&gt;    * More bug prune&lt;br/&gt;    * Difficult to debug&lt;br/&gt;&lt;br/&gt;&lt;br/&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-675288142890427843?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/675288142890427843/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=675288142890427843' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/675288142890427843'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/675288142890427843'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/why-multithreaded-design-was-avoided.html' title='Why multithreaded design was avoided'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2164526035482160331</id><published>2007-02-10T01:19:00.000-08:00</published><updated>2007-02-10T00:45:03.097-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='coroutines'/><title type='text'>The main feature that makes Stackless Tasklets</title><content type='html'>The main feature that makes Stackless so attractive for use as a scripting language is the support for tasklets. Tasklets make it possible to create “micro-threads”, allowing the programmer to switch among several execution threads that only exist in the python environment and have no dependencies on the underlying OS threads. Some would call these threads”green-threads”. These threads has very small footprint on memory and CPU. You can actually create hundreds of threads with almost no overhead. Every tasklet has only a few bytes of memory overhead. And the scheduling of threads takes O(1) time with a simple Round-Robin scheduling algorithm. If we where talking about native threads we would have almost 1MB of memory per thread and high cost scheduling to do things we don’t need. To all that add that the engine would behave very differently on different operating systems. Even on different versions of the same operating system.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2164526035482160331?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2164526035482160331/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2164526035482160331' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2164526035482160331'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2164526035482160331'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/main-feature-that-makes-stackless.html' title='The main feature that makes Stackless Tasklets'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2870778616206781470</id><published>2007-02-10T00:44:00.000-08:00</published><updated>2007-02-09T17:15:37.939-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='coroutines'/><title type='text'>Coroutines vs generators</title><content type='html'>Coroutines have a completely separate stack which is saved when they yield,&lt;br /&gt;so you have a load of nested function calls and yield from deep in the&lt;br /&gt;middle of them.&lt;br /&gt;&lt;br /&gt;Generators save only a single stack frame, so all yields must come directly&lt;br /&gt;from the generator, not from functions which it calls.&lt;br /&gt;&lt;br /&gt;You can use generators to get a similar effect to coroutines by nesting&lt;br /&gt;generators and propogating the yields back up the chain, but this has to be&lt;br /&gt;done explicitly at every level.&lt;br /&gt;&lt;br /&gt;--&lt;br /&gt;Duncan Booth duncan@rcp.co.uk&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2870778616206781470?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2870778616206781470/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2870778616206781470' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2870778616206781470'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2870778616206781470'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/coroutines-vs-generators.html' title='Coroutines vs generators'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-907653232097024408</id><published>2007-02-09T17:14:00.000-08:00</published><updated>2007-02-05T12:59:06.324-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='flash ffmpeg linux video'/><title type='text'>Posting Flash Videos with FFmpeg and FlowPlayer</title><content type='html'>Posting Flash Videos with FFmpeg and FlowPlayer&lt;br /&gt;&lt;br /&gt;Anna showing on FlowPlayer Last night I have posted my very first flash video on the web — and it was Anna sitting there watching, her own video for 2 minutes (which probably would only interest the parents and grand-parents). Anna’s video aside, I was also having fun figuring out getting that video online.&lt;br /&gt;&lt;br /&gt;There are many ways putting videos online. You can either:&lt;br /&gt;&lt;br /&gt;   1. Upload your AVI/QuickTime/WMV files onto a folder somewhere inside your hosting account.&lt;br /&gt;   2. Use a third party video hosting service like Google Video or YouTube.&lt;br /&gt;&lt;br /&gt;Personally I don’t like (2). You need to upload your videos to that 3rd party, and you have little control over how the final outcome will be encoded (bit rate, frame rate, quality, etc). Moreover, there are terms and conditions that you need to read through, let along agreeing to. At the end, who owns the rights to uploaded video?&lt;br /&gt;&lt;br /&gt;Being a control freak (well, only over the systems that I need to manage), I have always preferred option (1) by hosting video files inside my own accounts, which has some crazy amount of space and data transfer anyway. Except you don’t get that nice Flash applet which you can embed into your own pages, so visitors can &lt;click&gt; and watch the video without leaving the page. They don’t need to worry about saving onto the desktop, which media player to use, whether codec has been installed, etc. They Just WorksTM — perfect for the grand-parents :)&lt;br /&gt;&lt;br /&gt;With a bit of time wasted on research and mocking around, it turns out that you can easily achieve the effect of embedded flash video, and yet host the video files on your own server. And there’s zero penny you need to spend — all can be done via these open source software, FFmpeg and FlowPlayer.&lt;br /&gt;The Basis&lt;br /&gt;&lt;br /&gt;Here’s a summary of what needs to be done.&lt;br /&gt;&lt;br /&gt;   1. Convert the video file into a suitable format for Flash players.&lt;br /&gt;   2. Upload the converted file onto hosted account.&lt;br /&gt;   3. Upload the Flash player if hasn’t been done.&lt;br /&gt;   4. Paste HTML code snippet into the web page.&lt;br /&gt;&lt;br /&gt;Flash players can only play video files encoded into the FLV (Flash Video) format, which is also the format used by Google Video and YouTube. To do so the open source way is use the universal encoder, FFmpeg.&lt;br /&gt;FFmpeg&lt;br /&gt;&lt;br /&gt;Installing FFmpeg is trivial — at least on my Gentoo boxes :) Make sure appropriate USE flags are used during emerge. For example I have:&lt;br /&gt;&lt;br /&gt;  USE="aac amr encode ogg vorbis x264 xvid zlib" emerge ffmpeg&lt;br /&gt;&lt;br /&gt;Other Linux distribution? Not using Linux? Err. Good luck.&lt;br /&gt;&lt;br /&gt;To convert a movie using FFmpeg, do the following:&lt;br /&gt;&lt;br /&gt;$ ffmpeg -i movie.avi movie.flv&lt;br /&gt;&lt;br /&gt;It will then convert the AVI file into FLV Flash Video. FFmpeg can also handle many different container types, for example QuickTime, WMV1 (not WMV3 at the moment), MPEG4, etc, so just throw the video at it and see whether it handles it.&lt;br /&gt;&lt;br /&gt;There are many command line options that you can use to alter the encoding behaviour. For example if I wish to rescale the movie to 320×240, with 15 frame/sec, at video at 250kbps and audio down-sampling to 22,050Hz at 48kbps, I just tell FFmpeg to do it on the command line:&lt;br /&gt;&lt;br /&gt;$ ffmpeg -i movie.avi -s 320x240 -r 15 -b 250 -ar 22050 -ab 48 movie.flv&lt;br /&gt;&lt;br /&gt;There are many more options so do check out their manual if you are interested.&lt;br /&gt;&lt;br /&gt;There is another thing that we need to do — create a JPEG thumbnail for previewing. This will be displayed in the otherwise empty canvas of the flash player, before [Play] is pressed. For convenience sake, we’ll take the very first frame of the video.&lt;br /&gt;&lt;br /&gt;$ ffmpeg -i movie.avi -f mjpeg -t 0.001 movie.jpg&lt;br /&gt;&lt;br /&gt;FLVTool2&lt;br /&gt;&lt;br /&gt;FLVTool2 is needed to calculate and update meta data in the FLV file. Well, you don’t really need it as you can already play the FLV file spill out from FFmpeg, but because of the missing info, Flash player cannot show the buffering status and current playing position, etc.&lt;br /&gt;&lt;br /&gt;I was hesitated to install FLVTool2 because (1) it depends on Ruby which I need to emerge (2) it does not have an ebuild for it. But anyway, having it running is still trivial.&lt;br /&gt;&lt;br /&gt;   1. Make sure you already have Ruby installed.&lt;br /&gt;   2. Download the latest FLVTool2&lt;br /&gt;   3. Unpack the tarball, change into its directory, and run ruby setup.rb all as root.&lt;br /&gt;&lt;br /&gt;Now just run&lt;br /&gt;&lt;br /&gt;$ flvtool2 -U movie.flv&lt;br /&gt;&lt;br /&gt;Well, installation is actually optional. You can pretty much run FLVTool2 from inside its unpacked directory, for example.&lt;br /&gt;&lt;br /&gt;$ RUBYLIB=lib ruby bin/flvtool2 -U &lt;path to&gt;/movie.flv&lt;br /&gt;&lt;br /&gt;Your FLV is ready to go! Upload both FLV and generated JPEG thumbnail onto your web hosting account. Make sure they are in the same folder.&lt;br /&gt;FlowPlayer&lt;br /&gt;&lt;br /&gt;FlowPlayer is an open source Flash video player that is light-weight (at around 22kb), and pretty easy to configure. Download the latest version from SourceForge.&lt;br /&gt;&lt;br /&gt;Unpack the ZIP will give you the player file FlowPlayer.swf. Upload it somewhere on your website.&lt;br /&gt;&lt;br /&gt;Now you need to cut and paste this HTML code snippet onto the web page you wish to show the video:&lt;br /&gt;&lt;br /&gt;&lt;object type="application/x-shockwave-flash" data="[your site]/FlowPlayer.swf" width="320" height="263" id="FlowPlayer"&gt;&lt;br /&gt;  &lt;param name="allowScriptAccess" value="sameDomain"/&gt;&lt;br /&gt;  &lt;param name="movie" value="[your site]/FlowPlayer.swf"/&gt;&lt;br /&gt;  &lt;param name="quality" value="high"/&gt;&lt;br /&gt;  &lt;param name="scale" value="noScale"/&gt;&lt;br /&gt;  &lt;param name="wmode" value="transparent"/&gt;&lt;br /&gt;  &lt;param name="flashvars" value="baseURL=[base URL]&amp;amp;videoFile=movie.flv&lt;br /&gt;    &amp;amp;autoPlay=false&amp;amp;loop=false&amp;amp;autoBuffering=false&lt;br /&gt;    &amp;amp;splashImageFile=movie.jpg"/&gt;&lt;br /&gt;&lt;/object&gt;&lt;br /&gt;&lt;br /&gt;[your site] is the URL to where you keep the FlowPlayer.swf. [base URL] is the directory where you keep the FLV and JPEG files. For example, the final URL to FLV file will be [base URL]/movie.flv.&lt;br /&gt;&lt;br /&gt;Paste that onto your website, or into your blog post, and check whether it works!&lt;br /&gt;&lt;br /&gt;Please check FlowPlayer documentation on the options going to flashvars.&lt;br /&gt;Conclusion&lt;br /&gt;&lt;br /&gt;In fact those steps can be easily automated with a bit of scripting. I shall be posting more movies on Anna’s website.&lt;br /&gt;&lt;br /&gt;If your hosting companies are not very generous quota (i.e. small timers who can’t really oversell), or if you think your video will get digged and slashdotted and become overnight hit, then maybe having Google Video or YouTube to host for you is a wiser idea, just in case a huge hosting bill landing on your credit card statement.&lt;br /&gt;&lt;br /&gt;Otherwise, you might choose to host those videos on your own account, and regain a bit of control.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-907653232097024408?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/907653232097024408/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=907653232097024408' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/907653232097024408'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/907653232097024408'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/posting-flash-videos-with-ffmpeg-and.html' title='Posting Flash Videos with FFmpeg and FlowPlayer'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-3227015213941318751</id><published>2007-02-05T12:58:00.000-08:00</published><updated>2007-02-05T12:59:06.620-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='scaling'/><title type='text'>scaling rails</title><content type='html'>I've said it before, but it bears repeating: There's nothing interesting about how Ruby on Rails scales. We've gone the easy route and merely followed what makes Yahoo!, LiveJournal, and other high-profile LAMP stacks scale high and mighty.&lt;br /&gt;&lt;br /&gt;Take state out of the application servers and push it to database/memcached/shared network drive (that's the whole Shared Nothing thang). Use load balancers between your tiers, so you have load balancers -&gt; web servers -&gt; load balancers -&gt; app servers -&gt; load balancers -&gt; database/memcached/shared network drive servers. (Past the entry point, load balancers can just be software, like haproxy).&lt;br /&gt;&lt;br /&gt;In a setup like that, you can add almost any number of web and app servers without changing a thing.&lt;br /&gt;&lt;br /&gt;Scaling the database is the "hard part", but still a solved problem. Once you get beyond what can be easily managed by a descent master-slave setup (and that'll probably take millions and millions of pageviews per day), you start doing partitioning.&lt;br /&gt;&lt;br /&gt;Users 1-100K on cluster A, 100K-200K on cluster B, and so on. But again, this is nothing new. LiveJournal scales like that. I hear eBay too. And probably everyone else that has to deal with huge numbers.&lt;br /&gt;&lt;br /&gt;So the scaling part is solved. What's left is judging whether the economics of it are sensible to you. And that's really a performance issue, not a scalability one.&lt;br /&gt;&lt;br /&gt;If your app server costs $500 per month (like our dual xeons does) and can drive 30 requests/second on Rails and 60 requests/second on Java/PHP/.NET/whatever (these are totally arbitrary numbers pulled out of my...), then you're faced with the cost of $500 for 2.6 million requests/day on the Rails setup and $250 for the same on the other one.&lt;br /&gt;&lt;br /&gt;Now. How much is productivity worth to you? Let's just take a $60K/year programmer. That's $5K/month. If you need to handle 5 million requests/day, your programmer needs to be 10% more productive on Rails to make it even. If he's 15% more productive, you're up $250. And this is not even considering the joy and happiness programmers derive from working with more productive tools (nor that people have claimed to be many times more productive).&lt;br /&gt;&lt;br /&gt;Of course, the silly math above hinges on the assumption that the whatever stack is twice as fast as Rails. That’s a very big if. And totally dependent on the application, the people, and so on. Some have found Rails to be as fast or faster than comparable “best-of-breed J2EE stacks” — see http://weblog.rubyonrails.com/archives/2005/04/04/justingehtland-is-back-with-numbers-to-back-it-up/&lt;br /&gt;&lt;br /&gt;The point is that the cost per request is plummeting, but the cost of programming is not. Thus, we have to find ways to trade efficiency in the runtime for efficiency in the “thought time” in order to make the development of applications cheaper. I believed we’ve long since entered an age where simplicity of development and maintenance is where the real value lies.&lt;br /&gt;David Heinemeier Hansson&lt;br /&gt;Tuesday, July 12, 2005&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-3227015213941318751?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/3227015213941318751/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=3227015213941318751' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3227015213941318751'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3227015213941318751'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/scaling-rails.html' title='scaling rails'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-4712534613059913919</id><published>2007-02-05T00:43:00.000-08:00</published><updated>2007-02-05T00:44:18.407-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>PSQL</title><content type='html'>psql is so good&lt;br /&gt;hey&lt;br /&gt;in psql&lt;br /&gt;there is a \h&lt;br /&gt;which gives you sql query help,&lt;br /&gt;so \h select&lt;br /&gt;tehn as you are typing things out, hitting tab gives you options for what to use next sometimes&lt;br /&gt;try typing&lt;br /&gt;alter table [tab]&lt;br /&gt;then it displays list of tables it can see.&lt;br /&gt;then there is \? which lists other meta commands&lt;br /&gt;like \dt shows tables&lt;br /&gt;\l lists databases \&lt;br /&gt;\dn lists schemas&lt;br /&gt;\du  lists users&lt;br /&gt;well, for your setup mostly you have 1 users, one schema, and couple databases&lt;br /&gt;when you are connected to one database in psql, \c newdbname&lt;br /&gt;chages databases&lt;br /&gt;after a few days you get used to the auto-complete features, and the \h things to help you, it feels like a gui sort of, but much much faster than pgadmin&lt;br /&gt;though, i got into postgresql after being forced to work with oracle (yuk)&lt;br /&gt;the only oracle gui at the time was toad, which is by Que$t $oftware.&lt;br /&gt;and we never could afford to buy it&lt;br /&gt;so we learned the oracle meta database,&lt;br /&gt;which is oddly enough, tables and a database, to describe the database&lt;br /&gt;postgresql has this too&lt;br /&gt;the pg_catalog, where there are what looks like tables, to describe our user databases, schemas, tables, and our database objects.&lt;br /&gt;and the \d commands in psql sort of are short cuts for this&lt;br /&gt;but you can also do select .. from pg_* tables, and that gives us information on table features, and the columns, which is what pgadmin is doing behind the scenes for us to display their things all nicely formatted.&lt;br /&gt;mysql, on the otherhand, does not have a meta database, but then what do you expect from junk :)&lt;br /&gt;in version 3.3 the "show databases" command actually invoked a system command to "ls" (list files) in the mysql directory, since mysql used to (still does?) create databases as directories on the file system.&lt;br /&gt;this has the horrible side effect of making table names Case Sensitive, which violates the SQL standard (lol, and mysql claimes to be sql compliant, but cant even get case insensitive table names :!)&lt;br /&gt;we discovered that one time the hard way by migrating a mysql on windows app to mysql on unix, and of course developers will make code in different spots like &lt;br /&gt;select * from MyTable&lt;br /&gt;select * from mytable&lt;br /&gt;select * from MYTABLE&lt;br /&gt;etc&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-4712534613059913919?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/4712534613059913919/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=4712534613059913919' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4712534613059913919'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4712534613059913919'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/psql.html' title='PSQL'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2339698181857783439</id><published>2007-02-04T16:55:00.000-08:00</published><updated>2007-02-04T16:56:12.308-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='webserver'/><title type='text'>python handy debug  error tip</title><content type='html'>&lt;pre&gt;&lt;tt&gt;&lt;tt&gt;Here's a handy way to make debugging your web.py scripts a little&lt;br /&gt;easier. Just add this to your main script (before you do web.run()):&lt;br /&gt;&lt;br /&gt;def error():&lt;br /&gt; if web.webapi.ctx.ip == '&lt;your&gt;': web.debugerror()&lt;br /&gt; else: origerror()&lt;br /&gt;origerror = web.webapi.internalerror&lt;br /&gt;web.webapi.internalerror = error&lt;br /&gt;&lt;br /&gt;Add your IP address where it says. This will show detailed debug&lt;br /&gt;output if an exception occurs, but only when the request is from your&lt;br /&gt;IP address. Anyone else will get the usual "internal server error"&lt;br /&gt;message. This is a convenient way to make your web.py app securely&lt;br /&gt;debuggable without having to manually switch back and forth between&lt;br /&gt;debug/deploy modes every time you want to make a change.&lt;/tt&gt;&lt;/tt&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2339698181857783439?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2339698181857783439/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2339698181857783439' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2339698181857783439'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2339698181857783439'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/python-handy-debug-error-tip.html' title='python handy debug  error tip'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-1136270360661102884</id><published>2007-02-04T14:46:00.000-08:00</published><updated>2007-02-04T15:03:09.893-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='cheetah'/><title type='text'>Cheetah base templates</title><content type='html'>&lt;pre&gt;You can define a base class:&lt;br /&gt;&lt;br /&gt;class base:&lt;br /&gt;   def __init__(self):&lt;br /&gt;       web._compiletemplate('default.html', base='base')&lt;br /&gt;&lt;br /&gt;Use it:&lt;br /&gt;&lt;br /&gt;class page1(base):&lt;br /&gt;   def GET(self):&lt;br /&gt;       web.render('page1.html')&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Your templates:&lt;br /&gt;&lt;br /&gt;#DEFAULT.HTML&lt;br /&gt;&lt; html &gt;&lt;br /&gt;&lt;!-- skip header --&gt;&lt;br /&gt;&lt; body &gt;&lt;br /&gt;#block content&lt;br /&gt;CONTENT&lt;br /&gt;#end block&lt;br /&gt;&lt; / body &gt;&lt;br /&gt;&lt; / html &gt;&lt;br /&gt;&lt;br /&gt;#PAGE1.HTML&lt;br /&gt;#extends base&lt;br /&gt;&lt;br /&gt;#def content&lt;br /&gt;&lt;h2&gt;PAGE 1&lt;/h2&gt;&lt;br /&gt;You're on the 1st page.&lt;br /&gt;#end def&lt;br /&gt;&lt;br /&gt;#PAGE2.HTML&lt;br /&gt;#extends base&lt;br /&gt;&lt;br /&gt;#def content&lt;br /&gt;&lt;h2&gt;PAGE 2&lt;/h2&gt;&lt;br /&gt;You're on the 2nd page.&lt;br /&gt;#end def&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-1136270360661102884?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/1136270360661102884/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=1136270360661102884' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1136270360661102884'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1136270360661102884'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/cheetah-base-templates.html' title='Cheetah base templates'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-4088249351173828875</id><published>2007-02-03T22:37:00.000-08:00</published><updated>2007-02-03T22:39:04.049-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='admin'/><title type='text'>Ssh keys</title><content type='html'>you need to edit /etc/ssh/sshd_config and disable password auth&lt;br /&gt;&lt;br /&gt;but before that, you need to make sure you have the ssh keys set up, and the authorized_keys entry, and the directory permissions.&lt;br /&gt;&lt;br /&gt;i usually just&lt;br /&gt;ssh-keygen -t dsa -b 1024&lt;br /&gt;that creates&lt;br /&gt;$HOME/.ssh/id_dsa&lt;br /&gt;$HOME/.ssh/id_dsa.pub&lt;br /&gt;then cat $HOME/.ssh/id_dsa.pub &gt;&gt; $HOME/.ssh/authorized_keys&lt;br /&gt;then chmod -R 700 $HOME/.ssh&lt;br /&gt;then edit /etc/ssh/sshd_config to set password auth = no,&lt;br /&gt;or something.&lt;br /&gt;&lt;br /&gt;but the quickest defencse is to edit the sshd startup script and add&lt;br /&gt;-p 1234&lt;br /&gt;or some other not commonly thought of port&lt;br /&gt;&lt;br /&gt;so then it can work as it is, but you just&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;ssh -p 1234 yourbox.&lt;/span&gt;&lt;br /&gt;when ssh keys are working properly, you should be able to ssh without entering a password.&lt;br /&gt;&lt;br /&gt;so your system would have the private, public keys, and the remote web server only needs to have the entry of the id_dsa.pub  appended to authorized_keys&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-4088249351173828875?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/4088249351173828875/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=4088249351173828875' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4088249351173828875'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4088249351173828875'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/ssh-keys.html' title='Ssh keys'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-6037236412411749097</id><published>2007-02-03T00:26:00.000-08:00</published><updated>2007-02-03T00:27:04.946-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Do not forget to check for NULL [NULL + int in postgres results in NULL]</title><content type='html'>-- Function: update_total_votes_pictures()&lt;br /&gt;&lt;br /&gt;-- DROP FUNCTION update_total_votes_pictures();&lt;br /&gt;&lt;br /&gt;CREATE OR REPLACE FUNCTION update_total_votes_pictures()&lt;br /&gt;  RETURNS int4 AS&lt;br /&gt;$BODY$   -- returns the number of pixpair entries that were created&lt;br /&gt;DECLARE&lt;br /&gt;    _count          integer;&lt;br /&gt;    all             RECORD;&lt;br /&gt;    _total_a          integer;&lt;br /&gt;    _total_b          integer;&lt;br /&gt;    _total          integer;&lt;br /&gt;BEGIN&lt;br /&gt;&lt;br /&gt;    _count := 0;&lt;br /&gt;&lt;br /&gt;    FOR all in&lt;br /&gt;        select id from pictures&lt;br /&gt;    LOOP&lt;br /&gt;        select into _total_a sum(pic1_votes) from pixpair where pic1_id = all.id;&lt;br /&gt;    if ( _total_a is NULL ) then&lt;br /&gt;    _total_a := 0;&lt;br /&gt;    end if;&lt;br /&gt;    select into _total_b sum(pic2_votes) from pixpair where pic2_id = all.id;&lt;br /&gt;    &lt;span style="font-weight: bold;"&gt;    if ( _total_b is NULL ) then&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;    _total_b := 0;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;    end if; &lt;/span&gt;&lt;br /&gt;        _total := _total_a + _total_b;&lt;br /&gt;        update pictures set total_votes = _total where id = all.id;&lt;br /&gt;   &lt;br /&gt;        _count := _count + 1;&lt;br /&gt;    END LOOP;&lt;br /&gt;&lt;br /&gt;    return _count;&lt;br /&gt;END;&lt;br /&gt;$BODY$&lt;br /&gt;  LANGUAGE 'plpgsql' VOLATILE;&lt;br /&gt;ALTER FUNCTION update_total_votes_pictures() OWNER TO postgres;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-6037236412411749097?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/6037236412411749097/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=6037236412411749097' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6037236412411749097'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6037236412411749097'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/02/do-not-forget-to-check-for-null-null.html' title='Do not forget to check for NULL [NULL + int in postgres results in NULL]'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2539710151406280770</id><published>2007-01-30T19:51:00.000-08:00</published><updated>2007-01-30T19:52:30.715-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Moving a live database to a different directory</title><content type='html'>i want to move the running postgresql to a different directory any idea on how 2 do it&lt;br /&gt;as it is running outta space&lt;br /&gt;only 200mb left in 5gig&lt;br /&gt;and in a day that ll be filled&lt;br /&gt;&lt;br /&gt;&lt;span style="font-style: italic;"&gt;well, &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: italic;"&gt;a database can be created to use a tablespace &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: italic;"&gt;where the tablespace can be another mounted volume&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-style: italic;"&gt;you want to make sure the volume is never not mounted , such as have it mounted in /etc/fstab&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;well, lets say you go by a nice new 80gb drive&lt;br /&gt;and format it with ext3 or what ever you like to do&lt;br /&gt;and hook it up in /etc/fstab so that it is /u01 or some path you like a lot&lt;br /&gt;and then in postgresql&lt;br /&gt;create tablespace something path '/u01/myfolder/mytablespace'&lt;br /&gt;then you have to do something to alter database set tablespace , so that it uses that&lt;br /&gt;hmm, iv never actually migrated an existing database to a differetn tablespace,&lt;br /&gt;but i think it must be possible to move things from one tablespace to another one, so that it will physically sit on the new drive for us.&lt;br /&gt;http://www.postgresql.org/docs/8.1/interactive/sql-altertable.html&lt;br /&gt;SET TABLESPACE&lt;br /&gt;This form changes the table's tablespace to the specified tablespace and moves the data file(s) associated with the table to the new tablespace. Indexes on the table, if any, are not moved; but they can be moved separately with additional SET TABLESPACE commands. See also CREATE TABLESPACE.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2539710151406280770?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2539710151406280770/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2539710151406280770' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2539710151406280770'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2539710151406280770'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/moving-live-database-to-different.html' title='Moving a live database to a different directory'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-6042136998522694731</id><published>2007-01-30T12:48:00.001-08:00</published><updated>2007-01-30T12:48:18.449-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Application Design for PostgreSQL Performance</title><content type='html'>Query Writing Rules&lt;br /&gt;&lt;br /&gt;For all database management systems (DBMSes), "round-trip" time is significant. This is the amount of time which it takes a query to get through the the language parser, the driver, across the network interface, the database parser, the planner, the executor, the parser again, back across the network interface, through the driver data handler, and to the client application. DBMSes vary in the amount of time and CPU they take to process this cycle, and for a variety of reasons PostgreSQL is a the high end of time and system resources per round-trip.&lt;br /&gt;&lt;br /&gt;Further, PostgreSQL has significant per-transaction overhead, including log output and visibility rules which need to be set with each transaction. While you may think that you are not using transactions for singleton read-only SELECT statement, in fact every single statement in PostgreSQL is in a transaction. In the absence of an explicit transaction, the statement itself is an implicit transaction.&lt;br /&gt;&lt;br /&gt;Offsetting this, PostgreSQL is only barely second to Oracle in processing large complex queries, and has the capability to handle complex multi-statement transactions with overlapping concurrency conflicts with ease. We also support cursors, both scrollable and non-scrollable.&lt;br /&gt;&lt;br /&gt;Tip 1: Never use many small selects when one big query could go the job.&lt;br /&gt;&lt;br /&gt;It's common in MySQL applications to handle joins in the application code; that is, by querying the ID from the parent record and then looping through the child records with that ID manually. This can result in running hundreds or thousands of queries per user interface screen. Each of these queries carres 2-6 milleseconds of round-trip time, which doesn't seem significant until you add it up for 1000 queries, at which point you're losing 3-5 seconds to round trip time. Comparatively, retrieving all of those records in a single query only takes a few hundred milleseconds, a time savings of 80%.&lt;br /&gt;&lt;br /&gt;Tip 2: Group many small UPDATES, INSERTS or DELETEs into large statements, or failing that, large transactions.&lt;br /&gt;&lt;br /&gt;First, the lack of subselects in early versions of MySQL has caused application developers to design their data modification statements (DML) in much the same way as joins-in-middleware. This is also a bad approach for PostgreSQL. Instead, you want to take advantage of subselects and joins in your UPDATE, INSERT and DELETE statements to try to modify batches in a single statement. This reduces round-trip time and transaction overhead.&lt;br /&gt;&lt;br /&gt;In some cases, however, there is no single query which can write all the rows you want and you have to use a bunch of serial statements. In this case, you want to make sure to wrap your series DML statements in an explicit transaction (e.g. BEGIN; UPDATE; UPDATE; UPDATE; COMMIT;). This reduces transaction overhead and can cut execution time by as much as 50%.&lt;br /&gt;&lt;br /&gt;Tip 3: Consider bulk loading instead of serial INSERTS&lt;br /&gt;&lt;br /&gt;PostgreSQL provides a bulk loading mechanism called COPY, which takes tab-delimited or CSV input from a file or pipe. Where COPY can be used instead of hundreds or thousands of INSERTS, it can cut execution time by up to 75%.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-6042136998522694731?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/6042136998522694731/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=6042136998522694731' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6042136998522694731'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6042136998522694731'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/application-design-for-postgresql.html' title='Application Design for PostgreSQL Performance'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2895328995807652245</id><published>2007-01-30T12:23:00.000-08:00</published><updated>2007-01-30T12:24:09.517-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='dictionary python'/><title type='text'>Create a dictionary with these variables such that the keys are the variable names and the corresponding values are the variable values</title><content type='html'>&gt; i have few variables and i want to create a dictionary with these&lt;br /&gt;&gt; variables&lt;br /&gt;&gt; such that the keys are the variable names and the corresponding values&lt;br /&gt;&gt; are the variable values.how do i do this easily?&lt;br /&gt;&gt; for ex:&lt;br /&gt;&gt; var1='mark'&lt;br /&gt;&gt; var2=['1','2','3']&lt;br /&gt;&gt; my_dict = create_my_dictionary(var1, var2)&lt;br /&gt;&gt;&lt;br /&gt;&gt; and my_dict is {'var1':'mark', 'var2':['1','2','3']}&lt;br /&gt;&gt;&lt;br /&gt;&gt; is there a function for create_my_dictionary?&lt;br /&gt;&lt;br /&gt;var1='mark'&lt;br /&gt;var2=['1','2','3']&lt;br /&gt;my_dict = dict(var1=var1, var2=var2)&lt;br /&gt;&lt;br /&gt;In addition, if you are inside a function, and these are the only&lt;br /&gt;variables, using locals() may be useful too:&lt;br /&gt;&lt;br /&gt;def f():&lt;br /&gt;   a = 1&lt;br /&gt;   b = 2&lt;br /&gt;   c = locals()&lt;br /&gt;   print c&lt;br /&gt;   d = 3&lt;br /&gt;&lt;br /&gt;prints {'a': 1, 'b': 2}&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2895328995807652245?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2895328995807652245/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2895328995807652245' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2895328995807652245'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2895328995807652245'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/create-dictionary-with-these-variables.html' title='Create a dictionary with these variables such that the keys are the variable names and the corresponding values are the variable values'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8618969408957627184</id><published>2007-01-30T02:45:00.000-08:00</published><updated>2007-01-30T02:46:13.208-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='python linuxvirtualserver'/><title type='text'>Tools used by Youtube</title><content type='html'>http://www.erg.abdn.ac.uk/users/gorry/course/inet-pages/arp.html&lt;br /&gt;http://isc.org/sw/bind/&lt;br /&gt;http://www.postfix.org/&lt;br /&gt;http://www.gnu.org/software/cfengine/&lt;br /&gt;http://www.google.com/search?lr=&amp;ie=UTF-8&amp;oe=UTF-8&amp;q=AWK%2C&lt;br /&gt;http://www.linuxvirtualserver.org/&lt;br /&gt;http://www.google.com/search?lr=&amp;ie=UTF-8&amp;oe=UTF-8&amp;q=LVM%2C&lt;br /&gt;System Architect&lt;br /&gt;&lt;br /&gt;We're looking for a superstar operations-minded person to work on a myriad of vital infrastructure-related projects and handle daily site issues. The range of projects is truly huge and ranges from enhancing site monitoring to helping design and implement our ever-growing site infrastructure. If LVM, LVS, AWK, SVN, and ARP are more than TLAs for you, contact us!&lt;br /&gt;Required Skills and Experience&lt;br /&gt;At least 3 years experience with all of the following: HTML/DHTML, Javascript, Ajax, CSS, Python,&lt;br /&gt;    * Experience in managing and scaling a large set of systems.&lt;br /&gt;    * Working knowledge of: Linux, TCP/IP networking, security, mail, file systems.&lt;br /&gt;    * Scripting (bash, Perl, Python, etc.).&lt;br /&gt;    * BS in Computer Science or equivalent experience.&lt;br /&gt;    * Versatility. Must be able to pick up new skills / projects quickly.&lt;br /&gt;&lt;br /&gt;Preferred Experience&lt;br /&gt;&lt;br /&gt;    * RAID&lt;br /&gt;    * Load balancing (hardware and/or software)&lt;br /&gt;    * Postfix&lt;br /&gt;    * BIND&lt;br /&gt;    * cfengine&lt;br /&gt;    * Apache, lighttpd&lt;br /&gt;    * Site monitoring tools, such as Nagios&lt;br /&gt;&lt;br /&gt;To apply, please email a cover letter and resume (plain text, HTML, or PDF) to jobs@youtube.com. The subject line MUST include: "Job: System Architect".&lt;br /&gt;&lt;br /&gt;Purely as a bonus, please send us the decoded version of this text: ORUGKIDBNZZXOZLSEBUXGIBUGI&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8618969408957627184?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8618969408957627184/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8618969408957627184' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8618969408957627184'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8618969408957627184'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/tools-used-by-youtube.html' title='Tools used by Youtube'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-4590688483362133002</id><published>2007-01-27T21:19:00.000-08:00</published><updated>2007-01-27T21:20:07.536-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>HOWTO install tsearch2 for mediawiki on postgres (wikipgedia)</title><content type='html'>I just got wikipgedia 0.0.4 installed and working, and it is running&lt;br /&gt;sweeter than a horses arse at the Ascot races.  tsearch2 gave me some&lt;br /&gt;troubles, but they now seem to be resolved.  For those of you new to&lt;br /&gt;postgres or tsearch, I'll show you how I did it.&lt;br /&gt;&lt;br /&gt;This HOWTO assumes the following:&lt;br /&gt;&lt;br /&gt;You are running Debian unstable and you have the Postgres 8.1 client,&lt;br /&gt;server, and contrib packages installed and running.&lt;br /&gt;&lt;br /&gt;Create your database "wikidb" with owner "wikiadmin" and the schema&lt;br /&gt;"mediawiki".&lt;br /&gt;&lt;br /&gt;$ su - postgres -c "psql template1"&lt;br /&gt;template1=# CREATE USER wikiadmin WITH PASSWORD "somepassword";&lt;br /&gt;template1=# CREATE DATABASE wikidb WITH OWNER wikiadmin;&lt;br /&gt;template1=# \c wikidb&lt;br /&gt;wikidb=# CREATE SCHEMA mediawiki;&lt;br /&gt;wikidb=# \i /usr/share/postgresql/8.1/contrib/tsearch2.sql&lt;br /&gt;&lt;br /&gt;The \c command in psql connects you to the wikidb database.&lt;br /&gt;&lt;br /&gt;The \i command in psql "includes" the named file, executing all the SQL&lt;br /&gt;commands in the file as if you had typed them in.&lt;br /&gt;&lt;br /&gt;Notice we didn't give the "wikiadmin" account superuser powers inside&lt;br /&gt;postgres.  For security reasons, I don't recommend it. So you need to&lt;br /&gt;install tsearch2 into the wikidb database as the user "postgres", the&lt;br /&gt;default superuser account.  The commands above accomplish that for you.&lt;br /&gt;&lt;br /&gt;When I did this I noticed various errors that worried me.  Everything&lt;br /&gt;seems to work, but I'd prefer a version of tsearch2 that didn't spit out&lt;br /&gt;those errors during install.  They seem harmless, so I will reproduce&lt;br /&gt;them here, in case any googler shares my anxiety:&lt;br /&gt;&lt;br /&gt;### TSORT2 INSTALL ERROR MESSAGES ###&lt;br /&gt;psql:/usr/share/postgresql/8.1/contrib/tsearch2.sql:13:&lt;br /&gt;      NOTICE: CREATE TABLE / PRIMARY KEY will create&lt;br /&gt;      implicit index "pg_ts_dict_pkey" for table "pg_ts_dict"&lt;br /&gt;psql:/usr/share/postgresql/8.1/contrib/tsearch2.sql:145: &lt;br /&gt;      NOTICE: CREATE TABLE / PRIMARY KEY will create &lt;br /&gt;      implicit index "pg_ts_parser_pkey" for table "pg_ts_parser"&lt;br /&gt;psql:/usr/share/postgresql/8.1/contrib/tsearch2.sql:244: &lt;br /&gt;      NOTICE: CREATE TABLE / PRIMARY KEY will create &lt;br /&gt;      implicit index "pg_ts_cfg_pkey" for table "pg_ts_cfg"&lt;br /&gt;psql:/usr/share/postgresql/8.1/contrib/tsearch2.sql:251: &lt;br /&gt;      NOTICE: CREATE TABLE / PRIMARY KEY will create &lt;br /&gt;      implicit index "pg_ts_cfgmap_pkey" for table "pg_ts_cfgmap"&lt;br /&gt;psql:/usr/share/postgresql/8.1/contrib/tsearch2.sql:337: &lt;br /&gt;      NOTICE: type "tsvector" is not yet defined&lt;br /&gt;psql:/usr/share/postgresql/8.1/contrib/tsearch2.sql:342: &lt;br /&gt;      NOTICE: argument type tsvector is only a shell&lt;br /&gt;psql:/usr/share/postgresql/8.1/contrib/tsearch2.sql:396: &lt;br /&gt;      NOTICE: type "tsquery" is not yet defined&lt;br /&gt;psql:/usr/share/postgresql/8.1/contrib/tsearch2.sql:401: &lt;br /&gt;      NOTICE: argument type tsquery is only a shell&lt;br /&gt;psql:/usr/share/postgresql/8.1/contrib/tsearch2.sql:543: &lt;br /&gt;      NOTICE: type "gtsvector" is not yet defined&lt;br /&gt;psql:/usr/share/postgresql/8.1/contrib/tsearch2.sql:548: &lt;br /&gt;      NOTICE: argument type gtsvector is only a shell&lt;br /&gt;### END OF ERROR MESSAGES ###&lt;br /&gt;&lt;br /&gt;Once tsearch2 was installed, I went through the steps to getting&lt;br /&gt;wikipgedia set up through the web browser.  It seemed to work.  The main&lt;br /&gt;page popped up.  I clicked the Edit link.  Clicked the Save button.&lt;br /&gt;&lt;br /&gt;Horror!&lt;br /&gt;&lt;br /&gt;### EDIT ERROR MESSAGES ###&lt;br /&gt;Warning: pg_query(): Query failed: ERROR: column "si_title" of relation&lt;br /&gt;"searchindex" does not exist in&lt;br /&gt;/my/path/to/html/wiki/includes/DatabasePostgreSQL.php on line 98&lt;br /&gt;A database error has occurred Query: INSERT INTO searchindex&lt;br /&gt;(si_page,si_title,si_text) VALUES ( 1, to_tsvector('main&lt;br /&gt;page'),to_tsvector(' wiki software successfully installed please see&lt;br /&gt;documentation on customizing the interface and the user user''s guide&lt;br /&gt;for usage and configuration help test test test second test; see her&lt;br /&gt;knickers in a knot sh bm bm bm one more time for the howto ')) Function:&lt;br /&gt;SearchTsearch2:update Error: 1 ERROR: column "si_title" of relation&lt;br /&gt;"searchindex" does not exist&lt;br /&gt;&lt;br /&gt;Backtrace:&lt;br /&gt;&lt;br /&gt;      * GlobalFunctions.php line 500 calls wfbacktrace()&lt;br /&gt;      * DatabasePostgreSQL.php line 573 calls wfdebugdiebacktrace()&lt;br /&gt;      * Database.php line 383 calls databasepostgresql::reportqueryerror()&lt;br /&gt;      * SearchTsearch2.php line 116 calls databasepostgresql::query()&lt;br /&gt;      * SearchUpdate.php line 103 calls searchtsearch2::update()&lt;br /&gt;      * index.php line 270 calls searchupdate::doupdate()&lt;br /&gt;### END OF EDIT ERROR MESSAGES ###&lt;br /&gt;&lt;br /&gt;Finally, a tsearch2 webpage mentioned something about granting search&lt;br /&gt;privileges to some of the tsearch2 tables.  So I did this, first&lt;br /&gt;assuming the powers of the postgres account:&lt;br /&gt;&lt;br /&gt;$ su - postgres -c "psql wikidb"&lt;br /&gt;wikidb=# GRANT SELECT ON pg_ts_dict to wikiadmin;&lt;br /&gt;wikidb=# GRANT SELECT ON pg_ts_parser to wikiadmin;&lt;br /&gt;wikidb=# GRANT SELECT ON pg_ts_cfg to wikiadmin;&lt;br /&gt;wikidb=# GRANT SELECT ON pg_ts_cfgmap to wikiadmin;&lt;br /&gt;&lt;br /&gt;After doing this, wikipgedia worked like a champ.  Edit was fast and&lt;br /&gt;snappy and gave no errors.&lt;br /&gt;&lt;br /&gt;Kudos to the wikipgedia team.  If only every software package was so&lt;br /&gt;easy to install.  A pity I am no longer able to package it up and&lt;br /&gt;include it in Debian.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-4590688483362133002?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/4590688483362133002/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=4590688483362133002' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4590688483362133002'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4590688483362133002'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/howto-install-tsearch2-for-mediawiki-on.html' title='HOWTO install tsearch2 for mediawiki on postgres (wikipgedia)'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-3455164788764479941</id><published>2007-01-27T20:05:00.000-08:00</published><updated>2007-01-27T20:06:16.876-08:00</updated><title type='text'>urlquote post data</title><content type='html'>never urlquote a string when passing it by POST method. does not work.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-3455164788764479941?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/3455164788764479941/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=3455164788764479941' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3455164788764479941'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3455164788764479941'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/urlquote-post-data.html' title='urlquote post data'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-6853824382127723916</id><published>2007-01-27T13:32:00.000-08:00</published><updated>2007-01-27T13:34:41.385-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Invoking psql -c from bash for command to run in postgres</title><content type='html'>made some pretty neat bash shell scripts that interact with postgresql database just from using psql&lt;br /&gt;&lt;br /&gt;#!/bin/sh &lt;br /&gt; &lt;br /&gt;#variable for common things postgresql needs &lt;br /&gt;PSQL="/usr/bin/psql -q -t -h 192.168.1.4 -U thein -c "; &lt;br /&gt; &lt;br /&gt;function log_mail() { &lt;br /&gt;        ip="${1}"; &lt;br /&gt;        get_country "${ip}"; &lt;br /&gt;        timestamp="`date +"%Y-%m-%d %H:%M:%S"`"; &lt;br /&gt;        result="`${PSQL} "set search_path=net; select ip_block_log_save('${timestamp}', '${ip}', '${country}', false);"`"; &lt;br /&gt;} &lt;br /&gt;so this is a crusty old example, but i am doing a psql call to invoke ip_block_log_save() stored procedure, from inside a bash shell&lt;br /&gt;there are other goodies there too&lt;br /&gt;but the general idea is it is part of my incoming SMTP email server, and it compares the from address to a blacklist in a postgresql database.&lt;br /&gt;psql -q -t -h thehost -U theuser -c "select command here";&lt;br /&gt;that type of command line parameters supress the formatting of psql output;&lt;br /&gt;so it is a low level basic way to have shell scripts to work with databases, by using the psql command with -c "command to run" option&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-6853824382127723916?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/6853824382127723916/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=6853824382127723916' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6853824382127723916'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6853824382127723916'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/invoking-psql-c-from-bash-for-command.html' title='Invoking psql -c from bash for command to run in postgres'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-4747662459463093287</id><published>2007-01-27T02:31:00.001-08:00</published><updated>2007-01-27T02:32:43.593-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='memcached'/><title type='text'>Using memcached as a session store</title><content type='html'>I don't know about Flup. Remember that you have limitations on the&lt;br /&gt;amount of data that you can put into a cookie. Furthermore, you have&lt;br /&gt;to be careful because the user can hack his own cookies. Hence, if&lt;br /&gt;you're going to put anything more than a session ID, you should&lt;br /&gt;encrypt and sign the cookie. If you have lots of session data, you&lt;br /&gt;should instead put your session data on a separate server that all the&lt;br /&gt;Web servers can access. memcached is often used for this task. - Shannon Behrens&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-4747662459463093287?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/4747662459463093287/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=4747662459463093287' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4747662459463093287'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4747662459463093287'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/using-memcached-as-session-store.html' title='Using memcached as a session store'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2585180203726747657</id><published>2007-01-26T03:08:00.001-08:00</published><updated>2007-01-26T03:09:02.961-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='lighttpd'/><title type='text'>lighttpd fastcgi quirk</title><content type='html'>Make sure mod_fastcgi is in your modules list, somewhere after mod_rewrite and mod_access, but not after mod_accesslog. You’ll probably want mod_alias as well, for serving admin media.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2585180203726747657?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2585180203726747657/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2585180203726747657' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2585180203726747657'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2585180203726747657'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/lighttpd-fastcgi-quirk.html' title='lighttpd fastcgi quirk'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2908113184793132144</id><published>2007-01-25T19:54:00.000-08:00</published><updated>2007-01-25T19:55:34.824-08:00</updated><title type='text'>nginx file locations</title><content type='html'>Configuration summary&lt;br /&gt;  + threads are not used&lt;br /&gt;  + using system PCRE library&lt;br /&gt;  + OpenSSL library is not used&lt;br /&gt;  + md5 library is not used&lt;br /&gt;  + sha1 library is not used&lt;br /&gt;  + using system zlib library&lt;br /&gt;&lt;br /&gt;  nginx path prefix: "/usr/local/nginx"&lt;br /&gt;  nginx binary file: "/usr/local/nginx/sbin/nginx"&lt;br /&gt;  nginx configuration file: "/usr/local/nginx/conf/nginx.conf"&lt;br /&gt;  nginx pid file: "/usr/local/nginx/logs/nginx.pid"&lt;br /&gt;  nginx error log file: "/usr/local/nginx/logs/error.log"&lt;br /&gt;  nginx http access log file: "/usr/local/nginx/logs/access.log"&lt;br /&gt;  nginx http client request body temporary files: "/usr/local/nginx/client_body_temp"&lt;br /&gt;  nginx http proxy temporary files: "/usr/local/nginx/proxy_temp"&lt;br /&gt;  nginx http fastcgi temporary files: "/usr/local/nginx/fastcgi_temp"&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2908113184793132144?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2908113184793132144/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2908113184793132144' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2908113184793132144'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2908113184793132144'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/nginx-file-locations.html' title='nginx file locations'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-5385798193400261884</id><published>2007-01-22T21:57:00.000-08:00</published><updated>2007-01-22T22:08:12.382-08:00</updated><title type='text'>building flex, corelib with ant, json example</title><content type='html'>get flex binaries and unzip it to a directory: flex&lt;br /&gt;install jdk 1.5 and set &lt;br /&gt;export JAVA_HOME=/usr/java/jdk1.5.0_10&lt;br /&gt;export PATH=$JAVA_HOME/bin:$PATH&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;go to directory flex and build samples :&lt;br /&gt;samples/build-samples.sh&lt;br /&gt;&lt;br /&gt;it takes a while to build.&lt;br /&gt;now get as3corelib from google code svn&lt;br /&gt;svn co http://as3corelib.googlecode.com/svn/trunk/ as3corelib&lt;br /&gt;&lt;br /&gt;need ant to build this&lt;br /&gt;sudo yum install ant&lt;br /&gt;&lt;br /&gt;edit as3corelib/build/build.properties and set right directories:&lt;br /&gt;flex2sdk.bin.dir = /www/ask/work/flex/bin&lt;br /&gt;flex2sdk.lib.dir = /www/ask/work/flex/frameworks/libs&lt;br /&gt;flex2sdk.locale.dir = /www/ask/work/flex/frameworks/locale/{locale}&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;in directory as3corelib&lt;br /&gt;ant -f build/build.xml lib&lt;br /&gt;&lt;br /&gt;cp as3codelib/bin/corelib.swc flex/frameworks/libs/.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;now compile JSONExample.mxml&lt;br /&gt;flex/bin/mxmlc JSONExample.mxml&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-5385798193400261884?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/5385798193400261884/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=5385798193400261884' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5385798193400261884'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5385798193400261884'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/building-flex-corelib-with-ant-json.html' title='building flex, corelib with ant, json example'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-3207899312597121644</id><published>2007-01-22T00:07:00.000-08:00</published><updated>2007-01-22T00:09:47.330-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='admin'/><title type='text'>VMWARE in linux 2.6.19-1.2895 Fedora Core 6</title><content type='html'>had compile errors for vmware compilation with kernel 2.6.19&lt;br /&gt;&lt;br /&gt;make[1]: Entering directory `/usr/src/kernels/linux-2.6.19'&lt;br /&gt;  CC [M]  /tmp/vmware-config0/vmnet-only/driver.o&lt;br /&gt;  CC [M]  /tmp/vmware-config0/vmnet-only/hub.o&lt;br /&gt;  CC [M]  /tmp/vmware-config0/vmnet-only/userif.o&lt;br /&gt;/tmp/vmware-config0/vmnet-only/userif.c: In function `VNetCopyDatagramToUser':&lt;br /&gt;/tmp/vmware-config0/vmnet-only/userif.c:629: error: `CHECKSUM_HW' undeclared (first use in this function)&lt;br /&gt;/tmp/vmware-config0/vmnet-only/userif.c:629: error: (Each undeclared identifieris reported only once&lt;br /&gt;/tmp/vmware-config0/vmnet-only/userif.c:629: error: for each function it appears in.)&lt;br /&gt;make[2]: *** [/tmp/vmware-config0/vmnet-only/userif.o] Error 1&lt;br /&gt;make[1]: *** [_module_/tmp/vmware-config0/vmnet-only] Error 2&lt;br /&gt;make[1]: Leaving directory `/usr/src/kernels/linux-2.6.19'&lt;br /&gt;make: *** [vmnet.ko] Error 2&lt;br /&gt;make: Leaving directory `/tmp/vmware-config0/vmnet-only'&lt;br /&gt;Unable to build the vmnet module.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;so after finding some hints ( http://www.vmware.com/community/thread.jspa?messageID=512232)&lt;br /&gt;&lt;br /&gt;I dit the foloing !! use on own RISK I dont now if this change is OK!! but it works&lt;br /&gt;&lt;br /&gt;cd /usr/lib/vmware/modules/source/&lt;br /&gt;cp vmnet.tar vmnet.tar_orig&lt;br /&gt;tar xf vmnet.tar&lt;br /&gt; vi vmnet-only/bridge.c (comment out First line and add second line)&lt;br /&gt;----------------------------&lt;br /&gt;   /*if (skb-&gt;ip_summed == CHECKSUM_HW) { */&lt;br /&gt;   if (skb-&gt;ip_summed == CHECKSUM_COMPLETE) {&lt;br /&gt;---------------------------&lt;br /&gt;vi vmnet-only/userif.c&lt;br /&gt;---------------------------&lt;br /&gt;   /*    skb-&gt;ip_summed == CHECKSUM_HW &amp;&amp;amp;       */      /* Without checksum */&lt;br /&gt;        skb-&gt;ip_summed ==&lt;span style="font-weight: bold;"&gt; CHECKSUM_PARTIAL&lt;/span&gt; &amp;&amp;amp;&lt;br /&gt;---------------------------&lt;br /&gt;&lt;br /&gt;tar cf vmnet.tar vmnet-only&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(192, 192, 192);"&gt;I also did this for an other error touch /usr/src/kernels/linux-2.6.19/include/linux/config.h [dont do this do the ln instead    ]&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;sudo ln -s  autoconf.h config.h&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;in &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;/usr/src/kernels/2.6.19-1.2895.fc6-i686/include/linux&lt;/span&gt;&lt;br /&gt;Afther this everything else went fine&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-3207899312597121644?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/3207899312597121644/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=3207899312597121644' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3207899312597121644'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3207899312597121644'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/vmware-in-linux-2619-12895-fedora-core.html' title='VMWARE in linux 2.6.19-1.2895 Fedora Core 6'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-5697570590782365623</id><published>2007-01-21T15:31:00.000-08:00</published><updated>2007-01-21T15:32:00.961-08:00</updated><title type='text'>fix rpm problems in fedora</title><content type='html'>rm -f /var/lib/rpm/__db*&lt;br /&gt; #db_verify /var/lib/rpm/Packages&lt;br /&gt; #rpm --rebuilddb&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-5697570590782365623?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/5697570590782365623/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=5697570590782365623' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5697570590782365623'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5697570590782365623'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/fix-rpm-problems-in-fedora.html' title='fix rpm problems in fedora'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2852922866994356842</id><published>2007-01-21T01:16:00.001-08:00</published><updated>2007-01-21T01:18:12.412-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='python sets'/><title type='text'>Python Sets</title><content type='html'>http://www.linuxforums.org/programming/introduction_to_python__part_1.html&lt;br /&gt;Sets&lt;br /&gt;&lt;br /&gt;I think I don't really have to explain what a set is, as everyone should know them from mathematics. It's simply a pile of elements that do not have ordering and do not contain duplicates.&lt;br /&gt;&lt;br /&gt;A set has to be initialized with the elements of a list. Since you already know what a list is, we do this in one step. Just like with dictionaries, print can handle a set as it is.&lt;br /&gt;Once we have a set, I show he first useful feature of sets: testing whether an element is in the set.&lt;br /&gt;&lt;br /&gt;inventory_carpenter=set(['helmet', 'gloves', 'hammer'])&lt;br /&gt;print inventory_carpenter # outputs set(['helmet', 'hammer', 'gloves'])&lt;br /&gt;&lt;br /&gt;print 'gloves' in inventory_carpenter # outputs 'True'&lt;br /&gt;&lt;br /&gt;Since sets are interesting only if we have more that one of them, let's introduce another one! Once we have that, we can immediately see what are the elements that both sets contain (intersection).&lt;br /&gt;&lt;br /&gt;inventory_highscaler=set(['helmet', 'rope', 'harness', 'carabiner'])&lt;br /&gt;&lt;br /&gt;print inventory_carpenter &amp; inventory_highscaler # outputs 'set(['helmet'])'&lt;br /&gt;&lt;br /&gt;Similarly, we can have the &lt;span style="font-weight: bold;"&gt;union of sets ( using | ), difference ( using - ), or symmetric difference (using ^).&lt;/span&gt;&lt;br /&gt;For sets, you don't really need anything else, as you can do every meaningful operation using the ones above. For example, to add a new element, you can use union.&lt;br /&gt;&lt;br /&gt;inventory_carpenter = inventory_carpenter | set(['nails'])&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2852922866994356842?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2852922866994356842/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2852922866994356842' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2852922866994356842'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2852922866994356842'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/python-sets.html' title='Python Sets'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-4597212736629470477</id><published>2007-01-20T12:40:00.000-08:00</published><updated>2007-01-20T12:41:11.288-08:00</updated><title type='text'>plpgsql function to move from usercomments to comments</title><content type='html'>-- Function: usercomments_to_comments()&lt;br /&gt;&lt;br /&gt;-- DROP FUNCTION usercomments_to_comments();&lt;br /&gt;&lt;br /&gt;CREATE OR REPLACE FUNCTION usercomments_to_comments()&lt;br /&gt;  RETURNS int4 AS&lt;br /&gt;$BODY$&lt;br /&gt;declare&lt;br /&gt; c usercomments%rowtype;&lt;br /&gt; _count integer;&lt;br /&gt;begin&lt;br /&gt; _count := 0;&lt;br /&gt; for c in&lt;br /&gt;  select * from usercomments&lt;br /&gt; loop&lt;br /&gt;  insert into comments (comment, postid, created, ip, email, user_id, username, type_id) values (c.comment, c.user_id, c.created, c.ip, c.friend_email,c.friend_id,c.friend_name,2);&lt;br /&gt;  _count := _count + 1;&lt;br /&gt; end loop;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt; return _count;&lt;br /&gt;end;&lt;br /&gt;$BODY$&lt;br /&gt;  LANGUAGE 'plpgsql' VOLATILE;&lt;br /&gt;ALTER FUNCTION usercomments_to_comments() OWNER TO postgres;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-4597212736629470477?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/4597212736629470477/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=4597212736629470477' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4597212736629470477'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4597212736629470477'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/plpgsql-function-to-move-from.html' title='plpgsql function to move from usercomments to comments'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-7675614700854032358</id><published>2007-01-19T11:16:00.000-08:00</published><updated>2007-01-19T11:19:13.002-08:00</updated><title type='text'>sql threaded comment view result set</title><content type='html'>&lt;!--/noindex--&gt; &lt;span style="font-family:Arial, Helvetica, sans-serif;"&gt; &lt;!--X-Body-Begin--&gt; &lt;!--X-User-Header--&gt; &lt;!--X-User-Header-End--&gt; &lt;!--X-TopPNI--&gt; &lt;/span&gt;&lt;p&gt;  &lt;!--X-TopPNI-End--&gt; &lt;!--X-MsgBody--&gt; &lt;!--X-Subject-Header-Begin--&gt; &lt;!--noindex--&gt; &lt;/p&gt;&lt;h2&gt;&lt;span style="font-family:Arial, Helvetica, sans-serif;"&gt;Re: Can SQL return a threaded-comment-view result set?&lt;/span&gt;&lt;/h2&gt; &lt;hr noshade="noshade" size="3" width="40%"&gt; &lt;!--/noindex--&gt;&lt;!--X-Subject-Header-End--&gt;&lt;!--X-Head-of-Message--&gt;   &lt;!--noindex--&gt; &lt;ul&gt;&lt;span style="font-family:Arial, Helvetica, sans-serif;"&gt;&lt;li&gt;&lt;strong&gt;From&lt;/strong&gt;: &lt;strong&gt;&lt;a href="mailto:mvppetlab@DOMAIN.HIDDEN"&gt;mvppetlab ( at ) yahoo ( dot ) com&lt;/a&gt; (Chris)&lt;/strong&gt;&lt;/li&gt; &lt;li&gt;&lt;strong&gt;To&lt;/strong&gt;: &lt;strong&gt;"&lt;a href="mailto:pgsql-general@DOMAIN.HIDDEN"&gt;pgsql-general ( at ) postgresql ( dot ) org ( dot ) pgsql-novice&lt;/a&gt;"@postgresql.org&lt;/strong&gt;&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Subject&lt;/strong&gt;: &lt;strong&gt;Re: Can SQL return a threaded-comment-view result set?&lt;/strong&gt;&lt;/li&gt; &lt;li&gt;Date: 9 Oct 2003 18:46:20 -0700&lt;/li&gt; &lt;/span&gt;&lt;/ul&gt; &lt;!--/noindex--&gt;&lt;!--X-Head-of-Message-End--&gt;  &lt;!--X-Head-Body-Sep-Begin--&gt; &lt;hr /&gt; &lt;!--X-Head-Body-Sep-End--&gt; &lt;!--X-Body-of-Message--&gt; &lt;pre&gt;&lt;span style="font-family:Arial, Helvetica, sans-serif;"&gt;Hello all,&lt;br /&gt;&lt;br /&gt;I've been meaning to get back to you all but I just haven't had time.&lt;br /&gt;Okay, I've got a little bit of time now so here goes....&lt;br /&gt;&lt;br /&gt;I received many useful answers from many of you including Tom Lane,&lt;br /&gt;Joe Conway, and Josh Berkus. Max Nachlinger in particular on October&lt;br /&gt;5th (which was my birthday) sent me a large amount of threaded&lt;br /&gt;discussion forum code of his own. (Nice birthday present, Max. Thank&lt;br /&gt;you.) I will be investigating his solution when I have more time since&lt;br /&gt;his is almost certainly more efficient than my own.&lt;br /&gt;&lt;br /&gt;My own solution is a 20-line PL/pgSQL function I put together after&lt;br /&gt;reading the 7.3 docs at postgresql.org. It requires no modifications&lt;br /&gt;to my original example table definitions other than that I decided to&lt;br /&gt;use a 0 value instead of a NULL value for the in_reply_to column when&lt;br /&gt;a message isn't a reply, because that way my plpgsql function doesn't&lt;br /&gt;have to treat NULL as a special case.&lt;br /&gt;&lt;br /&gt;In particular, my solution doesn't require a message to keep pointers&lt;br /&gt;to its children. If a message is a reply, it simply points to its&lt;br /&gt;parent's id via in_reply_to. You can add as many messages as you want&lt;br /&gt;with just single simple INSERT statements; you don't have to do any&lt;br /&gt;tree-refactoring or updating to the parent. The downside is that while&lt;br /&gt;insert speed couldn't be any better and inserting couldn't be any&lt;br /&gt;easier, building the threaded view seems rather algorithmically&lt;br /&gt;inefficient, and in almost all applications optimising for obtaining&lt;br /&gt;the threaded view rather than insert speed is more important. One&lt;br /&gt;probably couldn't base even a moderate-load application on this&lt;br /&gt;solution, but if one wanted to anyways I suppose an in-memory tree&lt;br /&gt;representation could be maintained which allows new messages to be&lt;br /&gt;linked into the in-memory tree efficiently as they're inserted into&lt;br /&gt;the database, and then whenever the application is shutdown and&lt;br /&gt;reloaded it could rebuild that in-memory representation on startup. Or&lt;br /&gt;something. And until you run out of memory.... (Also, simply caching&lt;br /&gt;the results of queries could be effective if you have many identical&lt;br /&gt;queries producing identical results [which my application does] so&lt;br /&gt;this solution might not work too bad for me.)&lt;br /&gt;&lt;br /&gt;For the sake of googlers and like novices reading this, I've adapted&lt;br /&gt;my PL/pgSQL function so that it works with the original example I&lt;br /&gt;posted. (My real code uses more fields, different types, and has some&lt;br /&gt;other subtle differences because there's more than one type of table&lt;br /&gt;to consider and there are foreign key constraints.) After loading the&lt;br /&gt;below code, evaluating&lt;br /&gt;&lt;br /&gt;select * from threadview(0, 0);&lt;br /&gt;&lt;br /&gt;builds a table like the one I wanted in my original posting.&lt;br /&gt;&lt;br /&gt;          ---Chris&lt;br /&gt;&lt;br /&gt;=================================================&lt;br /&gt;-- This code originally due to Chris Barry,&lt;br /&gt;&lt;a rel="nofollow" href="http://www.goodfig.org/feedback"&gt;http://www.goodfig.org/feedback&lt;/a&gt;&lt;br /&gt;-- It's hereby placed in the public domain. These public domain&lt;br /&gt;licenses&lt;br /&gt;-- usually have some sort of warning about no guarantee of fitness for&lt;br /&gt;a particular&lt;br /&gt;-- purpose, etc. Well, the below code is DEFINITELY not fit for any&lt;br /&gt;purpose! So,&lt;br /&gt;-- use it at your own peril. Caveat emptor.&lt;br /&gt;&lt;br /&gt;-- drop database discussion;&lt;br /&gt;create database discussion;&lt;br /&gt;\c discussion&lt;br /&gt;&lt;br /&gt;--  The path to plpgsql.so may need to be edited for your system.&lt;br /&gt;create function plpgsql_call_handler()&lt;br /&gt;        returns opaque as '/usr/local/pgsql/lib/plpgsql.so' language&lt;br /&gt;'c';&lt;br /&gt;&lt;br /&gt;create language 'plpgsql' handler plpgsql_call_handler&lt;br /&gt;                           lancompiler 'PL/pgSQL';&lt;br /&gt;&lt;br /&gt;create table messages (&lt;br /&gt;      message_id integer,&lt;br /&gt;      in_reply_to integer,&lt;br /&gt;      created date,&lt;br /&gt;      author varchar(20),&lt;br /&gt;      title varchar(30),&lt;br /&gt;      message varchar(256),&lt;br /&gt;      primary key (message_id)&lt;br /&gt;);&lt;br /&gt;&lt;br /&gt;-- A threadrow is the same thing as a row from the messages table&lt;br /&gt;-- except a nesting integer has been added so the client knows how&lt;br /&gt;-- much to indent the thread message. I'm not sure if there's a&lt;br /&gt;-- syntax that makes it unnecessary to duplicate the redundant&lt;br /&gt;-- information from the messages table (e.g inheritance).&lt;br /&gt;create type threadrow as (&lt;br /&gt;      message_id integer,&lt;br /&gt;      in_reply_to integer,&lt;br /&gt;      created date,&lt;br /&gt;      author varchar(20),&lt;br /&gt;      title varchar(30),&lt;br /&gt;      message varchar(256),&lt;br /&gt;      nesting integer&lt;br /&gt;);&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;create or replace function threadview(int, int) returns setof&lt;br /&gt;threadrow as '&lt;br /&gt;declare&lt;br /&gt; p alias for $1; -- p is the parent&lt;br /&gt; i alias for $2; -- i is the indentation (nesting)&lt;br /&gt; c threadrow%rowtype;&lt;br /&gt; c2 threadrow%rowtype;&lt;br /&gt;begin&lt;br /&gt; for c in select *, 0 as nesting from messages&lt;br /&gt;                 where in_reply_to = p&lt;br /&gt;                        order by created asc&lt;br /&gt; loop&lt;br /&gt;        c.nesting = i;&lt;br /&gt;  return next c;&lt;br /&gt;  for c2 in select * from threadview(c.message_id, i+1) loop&lt;br /&gt;   return next c2;&lt;br /&gt;  end loop;&lt;br /&gt; end loop;&lt;br /&gt; return;&lt;br /&gt;end;&lt;br /&gt;' language 'plpgsql';&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;-- Load the table with some example data:&lt;br /&gt;&lt;br /&gt;insert into messages values&lt;br /&gt;(1,    0, '2003-09-01', 'John', 'Favorite DB?',&lt;br /&gt;           'What is your favorite database?');&lt;br /&gt;insert into messages values&lt;br /&gt;(2,    0, '2003-09-02', 'Mike', 'New DB2 benchmarks',&lt;br /&gt;           'I just posted some new DB2 benchmarks.');&lt;br /&gt;insert into messages values&lt;br /&gt;(3,    1, '2003-09-03', 'Mike', 'Re: Favorite DB?',&lt;br /&gt;       'I\'d say DB2.');&lt;br /&gt;insert into messages values&lt;br /&gt;(4,    1, '2003-09-05', 'Dave', 'Re: Favorite DB?',&lt;br /&gt;           'I\'m an Oracle man myself.');&lt;br /&gt;insert into messages values&lt;br /&gt;(5,    3, '2003-09-07', 'John', 'Re: Favorite DB?',&lt;br /&gt;       'DB2? I thought you liked free databases?');&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-7675614700854032358?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/7675614700854032358/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=7675614700854032358' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/7675614700854032358'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/7675614700854032358'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/sql-threaded-comment-view-result-set.html' title='sql threaded comment view result set'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2045866502072890802</id><published>2007-01-18T18:06:00.001-08:00</published><updated>2007-01-18T18:06:41.656-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='svn'/><title type='text'>svn revert adds before commiting</title><content type='html'>svn revert --recursive static/*&lt;br /&gt; will revert all ur svn add&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2045866502072890802?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2045866502072890802/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2045866502072890802' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2045866502072890802'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2045866502072890802'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/svn-revert-adds-before-commiting.html' title='svn revert adds before commiting'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2258881316749137937</id><published>2007-01-18T15:38:00.000-08:00</published><updated>2007-01-18T15:40:12.880-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>(extract('epoch' from now()) - extract('epoch' from created)) &lt; 14400</title><content type='html'>have a column named created tat says when the row was inserted&lt;br /&gt;i want to choose say the columns inserted in the last 4 hrs&lt;br /&gt;10 hrs or 1 day&lt;br /&gt;i cudnt find out how to do it&lt;br /&gt;select * from polls where created .. then waht&lt;br /&gt;are the random database queries too slow&lt;br /&gt;http://www.teenwag.com/poll?n=253&lt;br /&gt;Travis&lt;br /&gt;select * from polls.... where... and extract('epoch' from created) &lt; 14400&lt;br /&gt;wat is epoch&lt;br /&gt;7:04:02 pm&lt;br /&gt;Travis&lt;br /&gt;where the epoch field is the time in seconds, since 1970&lt;br /&gt;and 1440 is how many seconds in 4 hours&lt;br /&gt;actually&lt;br /&gt;7:04:23 pm&lt;br /&gt;&lt;br /&gt;is there a field in minutes or hours&lt;br /&gt;7:04:39 pm&lt;br /&gt;Travis&lt;br /&gt;that needs to be  and (now() - extract('epoch' from created)) &lt; 14400&lt;br /&gt;there is a field to get the day, hour, minute,&lt;br /&gt;year, month, but those are absolute (==)&lt;br /&gt;if you want something that is 4 hours ago or newer, then you need to do&lt;br /&gt;interval arithmetic yourself by taking the current time&lt;br /&gt;crap that still aitn it&lt;br /&gt;and (extract('epoch' from now()) - extract('epoch' from created)) &lt; 14400&lt;br /&gt;so this takes the absolute time, in seconds now and subtracts the&lt;br /&gt;absolute time from when it was created&lt;br /&gt;which if smaller than 60*60*4, is under 4 hours&lt;br /&gt;7:07:41 pm&lt;br /&gt;&lt;br /&gt;wow&lt;br /&gt;kewl&lt;br /&gt;7:08:22 pm&lt;br /&gt;Travis&lt;br /&gt;there is the age(timestamp) function but that displays pretty printing&lt;br /&gt;formattng&lt;br /&gt;not very usefl for a query&lt;br /&gt;http://developer.postgresql.org/pgdocs/postgres/functions-datetime.html&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2258881316749137937?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2258881316749137937/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2258881316749137937' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2258881316749137937'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2258881316749137937'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/now-extractepoch-from-created-14400.html' title='(extract(&apos;epoch&apos; from now()) - extract(&apos;epoch&apos; from created)) &lt; 14400'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-5227858531451195295</id><published>2007-01-17T03:30:00.000-08:00</published><updated>2007-01-17T03:31:18.049-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='scaling'/><title type='text'>From Hot Concept to Hot Site in Eight Days</title><content type='html'>&lt;blockquote&gt;  &lt;h1&gt;   &lt;span style="font-family:Verdana,Arial,Helvetica;"&gt;     From Hot Concept to Hot Site in Eight Days   &lt;/span&gt; &lt;/h1&gt;  &lt;h3&gt;&lt;span style="font-family:arial,helvetica,san-serif;"&gt;By James Hong&lt;/span&gt;&lt;/h3&gt;    &lt;p&gt;AmIHotOrNot.com evolved from an idea into one of the biggest sites on the Web in less than two months. Such rapid growth meant the site had to scale quickly, especially in the first eight days. &lt;/p&gt;  &lt;p&gt;It all started the evening of October 3, 2000, when I was sitting in my living room sharing a few beers with my roommate, Jim Young, and my brother, Tony. Jim had just mentioned that he thought a girl we had met at a party was a perfect "10," when the idea suddenly came to me: "Wouldn't it be funny to have a Web site where you could rate random pictures of people from 1 to 10?"&lt;/p&gt;   &lt;p&gt;For lack of anything better to do, we kept talking about it. We built the site in our heads, arguing over what kind of functionality the site would have, designing the user interface, and deciding on the details. After three hours, we had a whiteboard with a Web-site layout drawn on it and a burning desire to build the site. &lt;/p&gt;   &lt;p&gt;We also had the time. I was unemployed and working on an online resource for Simple Object Access Protocol (SOAP) developers, called XMethods.net, with my brother. Jim was an electrical engineering grad student at the University of California at Berkeley. Because the concept was so simple, the site took only a couple of days to build.&lt;/p&gt;      &lt;h3&gt;&lt;span style="font-family:Arial,Helvetica,sans-serif;"&gt;Keep It Simple&lt;/span&gt;&lt;/h3&gt;   &lt;p&gt;After hours of arguing over the Web-site design, the whiteboard had only three pages on it: one page where people voted on the appearance of others; another where people submitted their own pictures; and a final page where people viewed their own rating. We thought that simple was better.&lt;/p&gt;  &lt;p&gt;The simplicity of the site's user interface contributed to its addictive nature. Ultimately, we built the site using Apache, PHP, and MySQL.&lt;/p&gt;   &lt;p&gt;Initially, we decided to create the site with only three CGI scripts. While that would have been easier, we eventually decided to make the site more scalable in case we wanted to make changes or additions. Instead, we used a state-machine architecture.&lt;/p&gt;   &lt;p&gt;Every page view on our site can be thought of as a certain state. For instance, when a user is looking at a picture, he or she is in the "Vote" state. After a user votes, our machine's initial step is to process the "Exit" tasks of the Vote state, such as tallying the vote in the database. &lt;/p&gt;   &lt;p&gt;Based on switching variables, the machine then determines what the next state should be. In the simplest case, the next state should be a return to the Vote state.&lt;/p&gt;   &lt;p&gt;The state machine's final task is to perform the "Enter" tasks of the next state. This can include selecting a suitable photo to show the user. The Enter task also renders the HTML for the user, which, in this case, would be a page with another photo on which to vote (see&lt;!--   document.write('&lt;a href="http://www.newarchitectmag.com/documents/s=4447/new1013636391/javascript:openStatusWindow(\'figures/1.htm\',600, 400);"&gt;'); //--&gt;  &lt;a href="http://www.newarchitectmag.com/documents/s=4447/new1013636391/1.htm"&gt;Figure 1&lt;/a&gt;).&lt;/p&gt;   &lt;p&gt;Using this structure for such a simple site might seem like overkill, but it has definitely paid off. Changing the site is extremely easy due to the fact that all of the various tasks are centralized, including the routing between states. &lt;/p&gt;   &lt;p&gt;Another advantage of using the state-machine architecture is that it forced us to create distinct interfaces between states. By load balancing across a farm of identical servers, each running an instance of the state machine, the architecture made it easy to scale the site by just adding more Linux machines.&lt;/p&gt;      &lt;h3&gt;&lt;span style="font-family:Arial,Helvetica,sans-serif;"&gt;The Deluge&lt;/span&gt;&lt;/h3&gt;   &lt;p&gt;Not long after building our site, we noticed that starting at around 10 a.m. Pacific Standard Time, our machines' performance began degrading to the point at which our servers were being shut down. Upon further inspection, we noticed that the rate at which &lt;code&gt;SYN&lt;/code&gt; connections were being made was overwhelming our servers. When one computer wants to initiate a connection with another, it does so by sending a SYN packet that basically says, "Hello, I want to talk to you, can you hear me?" Many Denial of Service (DoS) attacks involve flooding a Web site with &lt;code&gt;SYN&lt;/code&gt; packets, so we immediately thought we were under a DoS attack. &lt;/p&gt;  &lt;p&gt;Of course, this problem disappeared as soon as we solved the real problem—our system didn't have enough capacity. It turns out that we weren't under attack, but rather the demand for our Web site was more than our system could handle. &lt;/p&gt;   &lt;p&gt;Reaching a position in which we might add machines was in and of itself a problem. When we started the site and were immediately flooded with hits, we considered obtaining some colocation space and setting things up ourselves. Then we realized a few things:&lt;/p&gt;  &lt;ul&gt;&lt;li&gt;We didn't have the money to buy servers, firewalls, and load   balancers.  &lt;/li&gt;&lt;li&gt;Even if we had the money, it would take a long time to get them.   &lt;/li&gt;&lt;li&gt;We didn't have the experience to set these up and maintain them.    &lt;/li&gt;&lt;li&gt;We didn't have the resources to handle this side of things.   &lt;/li&gt;&lt;li&gt;Hosting Web servers wasn't our core competency. &lt;/li&gt;&lt;/ul&gt;   &lt;p&gt;At that point, we had never heard of managed hosting. We learned about it when searching online for potential Web-hosting services. With managed hosting, customers lease machines that are already racked, instead of renting space in a data center. The managed host guarantees the uptime, handles the server maintenance and monitoring, and sells bandwidth based on actual usage instead of pipe width. Even more importantly, the host has extra machines on hand and can add servers at a moment's notice. &lt;/p&gt;   &lt;p&gt;This option let us lease our machines without having to arrange for bank financing (no bank would have lent us money, anyway). With managed hosting, we could outsource our entire network operations department. Thus, this decision was a no-brainer.&lt;/p&gt;   &lt;p&gt;We chose to use Rackspace Managed Hosting because it was top ranked by a couple of informational Web sites we consulted. This ended up being a great choice. That first week, I called Rackspace nearly every night around 3 a.m. to request another server. Each time, the new machine would be up and running by the time I awoke the next morning. By the end of the week, we had gone from one Web server to seven. &lt;/p&gt;      &lt;h3&gt;&lt;span style="font-family:Arial,Helvetica,sans-serif;"&gt;Database Overload&lt;/span&gt;&lt;/h3&gt;   &lt;p&gt;Once we had all the machines we needed to handle the massive amount of HTTP requests we were receiving, the database started bottlenecking. Our system architecture consisted of seven Web servers running Linux, and a Sun E220 that stored our database. One thing we learned through testing was that the open-source tools performed significantly better on a single-processor 700-MHz Pentium III machine running Linux than they did on a quad-processor Sun machine. MySQL is probably optimized for Linux because the open-source community develops it.&lt;/p&gt;  &lt;p&gt;We found a way to help our database keep up with the traffic for our particular application. First, nearly every query made is a &lt;code&gt;SELECT&lt;/code&gt; call. Second, there's no reason why all votes must be counted in real time. Given these circumstances, we decided to replicate the active portion of the database on each Web server so that &lt;code&gt;SELECT&lt;/code&gt; calls could be made locally. We then started caching votes on each machine, and configured the master database (now a Linux box) to poll each server periodically to collect votes and maintain replication integrity. This method shifts much of the database load to the individual servers, significantly reducing the load on our primary database machine. If the primary database ever becomes overloaded, we can simply add two more server machines and another layer of caching, as illustrated in&lt;!--   document.write('&lt;a href="http://www.newarchitectmag.com/documents/s=4447/new1013636391/javascript:openStatusWindow(\'figures/2.htm\',600, 400);"&gt;'); //--&gt;  &lt;a href="http://www.newarchitectmag.com/documents/s=4447/new1013636391/2.htm"&gt;Figure 2&lt;/a&gt;. &lt;/p&gt;      &lt;h3&gt;&lt;span style="font-family:Arial,Helvetica,sans-serif;"&gt;Economics 101&lt;/span&gt;&lt;/h3&gt;   &lt;p&gt;Hosting our users' pictures—something we had originally done on our own—was another scaling issue we faced. This was such a big issue—due to the costs involved—that we almost decided to shut the site down. On its second night of operation, shortly after a Salon.com article about our site was published, we were forced to take the site down at 10 p.m. &lt;/p&gt;  &lt;p&gt;We had already been operating under an incredible load for the two hours following 8 p.m., when the article went up. I estimate that we served more than 3GB worth of pictures in those two hours, and the number was accelerating. Because we weren't generating any revenue, it was clear that the economics of this plan just didn't scale. Not only did serving pictures incur bandwidth charges, but it also bottlenecked our CPUs.&lt;/p&gt;   &lt;p&gt;After stressing out for a couple more hours, I remembered that Yahoo Geocities gives its users FTP access, meaning that we could quickly upload the pictures to a Geocities account. As soon as I realized that we didn't have to host photos ourselves, I called Jim. As an interim measure, we sent new users to Geocities to set up their own accounts and we let them submit the URLs for their pictures, instead of the pictures themselves.&lt;/p&gt;   &lt;p&gt;As Jim began working on the solution, it occurred to me that some companies might actually want to host peoples' photos and pay us a bounty for sending them users. By directing our users to these companies, we turned one of our major costs into a revenue stream.&lt;/p&gt;     &lt;h3&gt;&lt;span style="font-family:Arial,Helvetica,sans-serif;"&gt;Scaling the Human Element&lt;/span&gt;&lt;/h3&gt;   &lt;p&gt;We had another problem with some users submitting pornography and other inappropriate photos. Initially, we decided to solve this by adding a link under each photo that said, "Click here if this picture is inappropriate." If a photo received enough clicks, based on a formula we had derived, the picture was removed. &lt;/p&gt;  &lt;p&gt;This worked pretty well, but not well enough. I sent the chairman of a large advertising network a link to our site with a note proclaiming that: "The odds of getting an inappropriate picture are extremely low." Ten minutes later, I received his reply: "Unfortunately, the first picture I saw was that of a topless woman." &lt;/p&gt;   &lt;p&gt;He informed us that if we wanted companies to advertise on our site, we'd have to filter each picture as it came in. Jim built an interface for us to do so. However, we soon realized that we couldn't spend all day screening pictures. The system's human component wasn't scalable. That's when we arrived at the moderator idea.&lt;/p&gt;   &lt;p&gt;We decided to build a system in which moderators could vote on whether to approve or reject a picture before it passed on to the main site. If a picture got enough votes either way, it was approved or rejected. By making the decision collective, no single moderator could approve or reject a picture independently. &lt;/p&gt;   &lt;p&gt;To help detect any rogue moderators, the system tracks each moderator's accuracy. A vote is counted as wrong when the moderator's vote goes against the final outcome of the picture. For instance, if one person votes to approve, but all others vote to reject, the one person is wrong. Moderators whose accuracy ratings drop below our threshold are kicked out.&lt;/p&gt;   &lt;p&gt;We decided to take the moderator system one step further by adding security levels. The higher a moderator's security level, the more his or her votes counted. We also gave higher-ranking moderators special privileges, like an expert mode in which they could judge pictures much faster. We gave the highest ranking moderators the ability to reject or accept moderator applications, and the ability to kick out rogue moderators. Today, these top-level moderators essentially run the moderator section of the site and decide on the specific guidelines for what makes a photo inappropriate. More than 1000 moderators are currently active, and they form our strongest community.&lt;/p&gt;      &lt;h3&gt;&lt;span style="font-family:Arial,Helvetica,sans-serif;"&gt;A Full Night's Sleep&lt;/span&gt;&lt;/h3&gt;   &lt;p&gt;I got about 15 hours of sleep over AmIHotorNot.com's first eight days—the time during which we addressed most of our scalability issues. Eight days after launching, we broke the one million page view barrier, reaching more than 1.8 million page views that day. By the end of November, we made NetNielsen's list of the top 25 advertising domains.&lt;/p&gt;  &lt;p&gt;The site now runs smoothly, and has handled as many as 14.8 million page views in a single day without even yawning. Looking back, I think that week of scaling easily wins distinction as the most stressful, most exhausting, most rewarding week I've ever had in my life. In this trial by fire, we certainly learned an incredible amount about building and scaling a Web application. &lt;/p&gt;   &lt;hr size="1"&gt;&lt;p&gt; James is a cofounder of Eight Days, which runs the amihotornot.com Web site. &lt;i&gt;[Editor's Note: Since publication the URL has been changed to &lt;a href="http://www.hotornot.com/" target="_new"&gt;www.hotornot.com&lt;/a&gt;.]&lt;/i&gt; He has a Bachelors Degree in electrical engineering and computer science, and an MBA, both from U.C. Berkeley.&lt;/p&gt;     &lt;/blockquote&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-5227858531451195295?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/5227858531451195295/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=5227858531451195295' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5227858531451195295'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5227858531451195295'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/from-hot-concept-to-hot-site-in-eight.html' title='From Hot Concept to Hot Site in Eight Days'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8508852025527853905</id><published>2007-01-16T23:26:00.000-08:00</published><updated>2007-01-16T23:31:08.757-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='scaling'/><title type='text'>Inside MySpace.com</title><content type='html'>&lt;div class="print_article"&gt;        &lt;div class="print_article_header"&gt;     &lt;span class="print_article_title"&gt;Inside MySpace.com&lt;/span&gt;&lt;br /&gt;                      &lt;span class="print_article_date"&gt;January 16, 2007&lt;/span&gt;&lt;br /&gt;         &lt;/div&gt;           &lt;div class="print_article_byline"&gt;              By       &lt;a href="http://www.baselinemag.com/author_bio/0,1541,a=4895,00.asp" class="print_article_byline"&gt;David F. Carr&lt;/a&gt;        &lt;/div&gt;         &lt;span name="intelliTxt" id="intelliTXT"&gt;&lt;p nd="1"&gt; Booming traffic demands put a constant stress on the social network's computing infrastructure. Yet, MySpace developers have repeatedly redesigned the Web site software, database and &lt;a itxtdid="3384110" target="_blank" href="http://www.baselinemag.com/print_article2/0,1217,a=198614,00.asp#" style="border-bottom: 0.075em solid darkgreen; font-weight: normal; font-size: 100%; text-decoration: underline; color: darkgreen; background-color: transparent; padding-bottom: 1px;" class="iAs"&gt;storage systems&lt;/a&gt; in an attempt to keep pace with exploding growth - the site now handles almost 40 billion page views a month. Most corporate Web sites will never have to bear more than a small fraction of the traffic MySpace handles, but anyone seeking to reach the mass market online can learn from its experience.&lt;/p&gt;    &lt;p&gt;&lt;b&gt;Story Guide:  &lt;/b&gt;&lt;/p&gt;&lt;li&gt;&lt;p&gt;&lt;!-- Vignette V6 Tue Jan 16 10:29:54 2007 --&gt; &lt;!--WEB 4--&gt;  &lt;!-- RELATED LINKS --&gt; &lt;/p&gt;&lt;p&gt;&lt;ziffarticle id="198614" page="2"&gt;&lt;b&gt; A Member Rants: "Fix the God Damn Inbox!"&lt;/b&gt;&lt;/ziffarticle&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/p&gt;&lt;/li&gt;&lt;li&gt;&lt;ziffarticle id="198614" page="3"&gt;&lt;b&gt;The Journey Begins&lt;/b&gt;&lt;/ziffarticle&gt;&lt;b&gt;&lt;br /&gt; &lt;/b&gt;&lt;p&gt;&lt;b&gt;Membership Milestones:&lt;br /&gt;&lt;/b&gt;&lt;/p&gt;&lt;/li&gt;&lt;li&gt;&lt;ziffarticle id="198614" page="4"&gt;&lt;b&gt;500,000 Users: A Simple Architecture Stumbles &lt;/b&gt;&lt;/ziffarticle&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/li&gt;&lt;li&gt;&lt;ziffarticle id="198614" page="5"&gt;&lt;b&gt;1 Million Users:Vertical Partitioning Solves Scalability Woes &lt;/b&gt;&lt;/ziffarticle&gt;&lt;b&gt;&lt;br /&gt; &lt;/b&gt;&lt;/li&gt;&lt;li&gt;&lt;ziffarticle id="198614" page="6"&gt;&lt;b&gt;3 Million Users: Scale-Out Wins Over Scale-Up &lt;/b&gt;&lt;/ziffarticle&gt;&lt;b&gt;&lt;br /&gt; &lt;/b&gt;&lt;/li&gt;&lt;li&gt;&lt;ziffarticle id="198614" page="7"&gt;&lt;b&gt;9 Million Users: Site Migrates to ASP.NET, Adds Virtual Storage &lt;/b&gt;&lt;/ziffarticle&gt;&lt;b&gt;&lt;br /&gt; &lt;/b&gt;&lt;/li&gt;&lt;li&gt;&lt;ziffarticle id="198614" page="8"&gt;&lt;b&gt;26 Million Users: MySpace Embraces 64-Bit Technology &lt;/b&gt;&lt;/ziffarticle&gt;&lt;b&gt;&lt;br /&gt; &lt;/b&gt;&lt;/li&gt;&lt;li&gt;&lt;ziffarticle id="198614" page="9"&gt;&lt;b&gt; What's Behind Those "Unexpected Error" Screens?&lt;/b&gt;&lt;/ziffarticle&gt;&lt;b&gt;&lt;br /&gt;   &lt;/b&gt;&lt;p&gt;&lt;b&gt;Also in This Feature:&lt;br /&gt; &lt;/b&gt;&lt;/p&gt;&lt;/li&gt;&lt;li&gt;&lt;ziffarticle id="198618"&gt;&lt;b&gt; The Company's Top Players and Alumni &lt;/b&gt;&lt;/ziffarticle&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/li&gt;&lt;li&gt;&lt;ziffarticle id="198619"&gt;&lt;b&gt; Technologies To Handle Mushrooming Demand&lt;/b&gt;&lt;/ziffarticle&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/li&gt;&lt;li&gt;&lt;ziffarticle id="198616"&gt;&lt;b&gt; Web Design Experts Grade MySpace&lt;/b&gt;&lt;/ziffarticle&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/li&gt;&lt;li&gt;&lt;ziffarticle id="198615"&gt;&lt;b&gt; User Customization: Too Much of a Good Thing?&lt;/b&gt;&lt;/ziffarticle&gt;&lt;b&gt;&lt;br /&gt;  &lt;/b&gt;&lt;p nd="2"&gt;&lt;b&gt;Reader Question:&lt;/b&gt; Is MySpace the future of corporate communications? Write to: &lt;a href="mailto:%20baseline@ziffdavis.com"&gt; baseline@ziffdavis.com &lt;/a&gt;   &lt;/p&gt; &lt;ziffplacead&gt;  &lt;/ziffplacead&gt;&lt;p&gt;&lt;ziffarticle id="198614" page="2"&gt;&lt;b&gt; Next page: A Member Rants: "Fix the God Damn Inbox!"&lt;/b&gt;&lt;/ziffarticle&gt;&lt;br /&gt;&lt;ziffpage title="A Member Rants: " fix="" the="" god="" damn="" inbox=""&gt;  &lt;img src="http://www.baselinemag.com/print_article2/images/bl_spacer.gif" width="500" height="1" /&gt; &lt;/ziffpage&gt;&lt;/p&gt;&lt;p&gt;&lt;b&gt;A Member Rants: "Fix the God Damn Inbox!"&lt;/b&gt;&lt;/p&gt; &lt;p nd="3"&gt;On his MySpace profile page, Drew, a 17-year-old from Dallas, is bare-chested, in a photo that looks like he might have taken it of himself, with the camera held at arm's length. His "friends list" is weighted toward pretty girls and fast cars, and you can read that he runs on the school track team, plays guitar and drives a blue Ford Mustang.&lt;/p&gt; &lt;p nd="4"&gt;But when he turns up in the forum where users vent their frustrations, he's annoyed. "FIX THE GOD DAMN INBOX!" he writes, "shouting" in all caps. Drew is upset because the private messaging system for MySpace members will let him send notes and see new ones coming in, but when he tries to open a message, the Web site displays what he calls "the typical sorry ... blah blah blah [error] message."&lt;/p&gt; &lt;p nd="5"&gt;For MySpace, the good news is that Drew cares so much about access to this online meeting place, as do the owners of 140 million other MySpace accounts. That's what has made MySpace one of the world's most trafficked Web sites. &lt;/p&gt; &lt;p nd="6"&gt;In November, MySpace, for the first time, surpassed even Yahoo in the number of Web pages visited by U.S. Internet users, according to comScore Media Metrix, which recorded 38.7 billion page views for MySpace as opposed to 38.05 billion for Yahoo.&lt;/p&gt; &lt;p nd="7"&gt;The bad news is that MySpace reached this point so fast, just three years after its official launch in November 2003, that it has been forced to address problems of extreme scalability that only a few other organizations have had to tackle.&lt;/p&gt; &lt;p nd="8"&gt;The result has been periodic overloads on MySpace's Web servers and database, with MySpace users frequently seeing a Web page headlined "Unexpected Error" and other pages that apologize for various functions of the Web site being offline for maintenance. And that's why Drew and other MySpace members who can't send or view messages, update their profiles or perform other routine tasks pepper MySpace forums with complaints.&lt;/p&gt; &lt;ziffplacead&gt; &lt;/ziffplacead&gt;&lt;p nd="9"&gt;These days, MySpace seems to be perpetually overloaded, according to Shawn White, director of outside operations for the Keynote Systems performance monitoring service. "It's not uncommon, on any particular day, to see 20% errors logging into the MySpace site, and we've seen it as high as 30% or even 40% from some locations," he says. "Compare that to what you would expect from Yahoo or Salesforce.com, or other sites that are used for commercial purposes, and it would be unacceptable." On an average day, he sees something more like a 1% error rate from other major Web sites.&lt;/p&gt; &lt;p nd="10"&gt;In addition, MySpace suffered a 12-hour outage, starting the night of July 24, 2006, during which the only live Web page was an apology about problems at the main data center in Los Angeles, accompanied by a Flash-based Pac-Man game for users to play while they waited for service to be restored. (Interestingly, during the outage, traffic to the MySpace Web site went up, not down, says Bill Tancer, general manager of research for Web site tracking service Hitwise: "That's a measure of how addicted people are—that all these people were banging on the domain, trying to get in.")&lt;/p&gt; &lt;p nd="11"&gt;Jakob Nielsen, the former Sun Microsystems engineer who has become famous for his Web site critiques as a principal of the Nielsen Norman Group consultancy, says it's clear that MySpace wasn't created with the kind of systematic approach to computer engineering that went into Yahoo, eBay or Google. Like many other observers, he believes MySpace was surprised by its own growth. "I don't think that they have to reinvent all of computer science to do what they're doing, but it is a large-scale computer science problem," he says.&lt;/p&gt; &lt;p nd="12"&gt;MySpace developers have repeatedly redesigned the Web site's software, database and &lt;a itxtdid="3384108" target="_blank" href="http://www.baselinemag.com/print_article2/0,1217,a=198614,00.asp#" style="border-bottom: 0.075em solid darkgreen; font-weight: normal; font-size: 100%; text-decoration: underline; color: darkgreen; background-color: transparent; padding-bottom: 1px;" class="iAs"&gt;storage&lt;/a&gt; systems to try to keep pace with exploding growth, but the job is never done. "It's kind of like painting the Golden Gate Bridge, where every time you finish, it's time to start over again," says Jim Benedetto, MySpace's vice president of technology.&lt;/p&gt; &lt;p nd="13"&gt;So, why study MySpace's technology? Because it has, in fact, overcome multiple systems scalability challenges just to get to this point.&lt;/p&gt; &lt;p nd="14"&gt;Benedetto says there were many lessons his team had to learn, and is still learning, the hard way. Improvements they are currently working on include a more flexible data caching system and a geographically distributed architecture that will protect against the kind of outage MySpace experienced in July.&lt;/p&gt; &lt;p nd="15"&gt;Most corporate Web sites will never have to bear more than a small fraction of the traffic MySpace handles, but anyone seeking to reach the mass market online can learn from its example.&lt;/p&gt;  &lt;p&gt;&lt;ziffarticle id="198614" page="3"&gt;&lt;b&gt;Next page: The Journey Begins &lt;/b&gt;&lt;/ziffarticle&gt;  &lt;ziffpage title="The Journey Begins"&gt;  &lt;img src="http://www.baselinemag.com/print_article2/images/bl_spacer.gif" width="500" height="1" /&gt; &lt;/ziffpage&gt;&lt;/p&gt;&lt;p&gt;&lt;b&gt;The Journey Begins&lt;/b&gt;&lt;/p&gt; &lt;p nd="16"&gt;MySpace may be struggling with scalability issues today, but its leaders started out with a keen appreciation for the importance of Web site performance.&lt;/p&gt; &lt;p nd="17"&gt;The Web site was launched a little more than three years ago by an Internet marketing company called Intermix Media (also known, in an earlier incarnation, as eUniverse), which ran an assortment of e-mail marketing and Web businesses. MySpace founders Chris DeWolfe and Tom Anderson had previously founded an e-mail marketing company called ResponseBase that they sold to Intermix in 2002. The ResponseBase team received $2 million plus a profit-sharing deal, according to a Web site operated by former Intermix CEO Brad Greenspan. (Intermix was an aggressive Internet marketer—maybe too aggressive. In 2005, then New York Attorney General Eliot Spitzer—now the state's governor—won a $7.9 million settlement in a lawsuit charging Intermix with using adware. The company admitted no wrongdoing.)&lt;/p&gt; &lt;p nd="18"&gt;In 2003, Congress passed the CAN-SPAM Act to control the use of unsolicited e-mail marketing. Intermix's leaders, including DeWolfe and Anderson, saw that the new laws would make the e-mail marketing business more difficult and "were looking to get into a new line of business," says Duc Chau, a software developer who was hired by Intermix to rewrite the firm's e-mail marketing software.&lt;/p&gt; &lt;p nd="19"&gt;At the time, Anderson and DeWolfe were also members of Friendster, an earlier entrant in the category MySpace now dominates, and they decided to create their own social networking site. Their version omitted many of the restrictions Friendster placed on how users could express themselves, and they also put a bigger emphasis on music and allowing bands to promote themselves online. Chau developed the initial version of the MySpace Web site in Perl, running on the Apache Web server, with a MySQL database back end. That didn't make it past the test phase, however, because other Intermix developers had more experience with ColdFusion, the Web application environment originally developed by Allaire and now owned by Adobe. So, the production Web site went live on ColdFusion, running on Windows, and Microsoft SQL Server as the database.&lt;/p&gt; &lt;ziffplacead&gt; &lt;/ziffplacead&gt;&lt;p nd="20"&gt;Chau left the company about then, leaving further Web development to others, including Aber Whitcomb, an Intermix technologist who is now MySpace's chief technology officer, and Benedetto, who joined about a month after MySpace went live.&lt;/p&gt; &lt;p nd="21"&gt;MySpace was launched in 2003, just as Friendster started having trouble keeping pace with its own runaway growth. In a recent interview with Fortune magazine, Friendster president Kent Lindstrom admitted his service stumbled at just the wrong time, taking 20 to 30 seconds to deliver a page when MySpace was doing it in 2 or 3 seconds.&lt;/p&gt; &lt;p nd="22"&gt;As a result, Friendster users began to defect to MySpace, which they saw as more dependable. &lt;/p&gt; &lt;p nd="23"&gt;Today, MySpace is the clear "social networking" king. Social networking refers to Web sites organized to help users stay connected with each other and meet new people, either through introductions or searches based on common interests or school affiliations. Other prominent sites in this category include Facebook, which originally targeted university students; and LinkedIn, a professional networking site, as well as Friendster. MySpace prefers to call itself a "next generation portal," emphasizing a breadth of content that includes music, comedy and videos. It operates like a virtual nightclub, with a juice bar for under-age visitors off to the side, a meat-market dating scene front and center, and marketers in search of the youth sector increasingly crashing the party.&lt;/p&gt; &lt;p nd="24"&gt;Users register by providing basic information about themselves, typically including age and hometown, their sexual preference and their marital status. Some of these options are disabled for minors, although MySpace continues to struggle with a reputation as a stomping ground for sexual predators.&lt;/p&gt; &lt;p nd="25"&gt;MySpace profile pages offer many avenues for self-expression, ranging from the text in the About Me section of the page to the song choices loaded into the MySpace music player, video choices, and the ranking assigned to favorite friends. MySpace also gained fame for allowing users a great deal of freedom to customize their pages with Cascading Style Sheets (CSS), a Web standard formatting language that makes it possible to change the fonts, colors and background images associated with any element of the page. The results can be hideous—pages so wild and discolored that they are impossible to read or navigate—or they can be stunning, sometimes employing professionally designed templates (see "Too Much of a Good Thing?" p. 48).&lt;/p&gt; &lt;p nd="26"&gt;The "network effect," in which the mass of users inviting other users to join MySpace led to exponential growth, began about eight months after the launch "and never really stopped," Chau says.&lt;/p&gt; &lt;p nd="27"&gt;News Corp., the media empire that includes the Fox television networks and 20th Century Fox movie studio, saw this rapid growth as a way to multiply its share of the audience of Internet users, and bought MySpace in 2005 for $580 million. Now, News Corp. chairman Rupert Murdoch apparently thinks MySpace should be valued like a major Web portal, recently telling a group of investors he could get $6 billion—more than 10 times the price he paid in 2005—if he turned around and sold it today. That's a bold claim, considering the Web site's total revenue was an estimated $200 million in the fiscal year ended June 2006. News Corp. says it expects Fox Interactive as a whole to have revenue of $500 million in 2007, with about $400 million coming from MySpace.&lt;/p&gt; &lt;ziffplacead&gt; &lt;/ziffplacead&gt;&lt;p nd="28"&gt;But MySpace continues to grow. In December, it had 140 million member accounts, compared with 40 million in November 2005. Granted, that doesn't quite equate to the number of individual users, since one person can have multiple accounts, and a profile can also represent a band, a fictional character like Borat, or a brand icon like the Burger King.&lt;/p&gt; &lt;p nd="29"&gt;Still, MySpace has tens of millions of people posting messages and comments or tweaking their profiles on a regular basis—some of them visiting repeatedly throughout the day. That makes the technical requirements for supporting MySpace much different than, say, for a news Web site, where most content is created by a relatively small team of editors and passively consumed by Web site visitors. In that case, the content management database can be optimized for read-only requests, since additions and updates to the database content are relatively rare. A news site might allow reader comments, but on MySpace user-contributed content is the primary content. As a result, it has a higher percentage of database interactions that are recording or updating information rather than just retrieving it.&lt;/p&gt; &lt;p nd="30"&gt;Every profile page view on MySpace has to be created dynamically—that is, stitched together from database lookups. In fact, because each profile page includes links to those of the user's friends, the Web site software has to pull together information from multiple tables in multiple databases on multiple servers. The database workload can be mitigated somewhat by caching data in memory, but this scheme has to account for constant changes to the underlying data.&lt;/p&gt; &lt;p nd="31"&gt;The Web site architecture went through five major revisions—each coming after MySpace had reached certain user account milestones—and dozens of smaller tweaks, Benedetto says. "We didn't just come up with it; we redesigned, and redesigned, and redesigned until we got where we are today," he points out.&lt;/p&gt; &lt;p nd="32"&gt;Although MySpace declined formal interview requests, Benedetto answered Baseline's questions during an appearance in November at the SQL Server Connections conference in Las Vegas. Some of the technical information in this story also came from a similar "mega-sites" presentation that Benedetto and his boss, chief technology officer Whitcomb, gave at Microsoft's MIX Web developer conference in March.&lt;/p&gt; &lt;p nd="33"&gt;As they tell it, many of the big Web architecture changes at MySpace occurred in 2004 and early 2005, as the number of member accounts skyrocketed into the hundreds of thousands and then millions. &lt;/p&gt; &lt;p nd="34"&gt;At each milestone, the Web site would exceed the maximum capacity of some component of the underlying system, often at the database or storage level. Then, features would break, and users would scream. Each time, the technology team would have to revise its strategy for supporting the Web site's workload.&lt;/p&gt; &lt;p nd="35"&gt;And although the systems architecture has been relatively stable since the Web site crossed the 7 million account mark in early 2005, MySpace continues to knock up against limits such as the number of simultaneous connections supported by SQL Server, Benedetto says: "We've maxed out pretty much everything."&lt;/p&gt;  &lt;p&gt;&lt;ziffarticle id="198614" page="4"&gt;&lt;b&gt;Next page: First Milestone: 500,000 Accounts&lt;/b&gt;&lt;/ziffarticle&gt;  &lt;ziffpage title="First Milestone: 500,000 Accounts"&gt;  &lt;img src="http://www.baselinemag.com/print_article2/images/bl_spacer.gif" width="500" height="1" /&gt; &lt;/ziffpage&gt;&lt;/p&gt;&lt;p&gt;&lt;b&gt;First Milestone: 500,000 Accounts&lt;/b&gt;&lt;/p&gt; &lt;p nd="36"&gt;MySpace started small, with two Web servers talking to a single database server. Originally, they were 2-processor Dell servers loaded with 4 gigabytes of memory, according to Benedetto.&lt;/p&gt; &lt;p nd="37"&gt;Web sites are better off with such a simple architecture—if they can get away with it, Benedetto says. "If you can do this, I highly recommend it because it's very, very non-complex," he says. "It works great for small to medium-size Web sites."&lt;/p&gt; &lt;p nd="38"&gt;The single database meant that everything was in one place, and the dual Web servers shared the workload of responding to user requests. But like several subsequent revisions to MySpace's underlying systems, that three-server arrangement eventually buckled under the weight of new users. For a while, MySpace absorbed user growth by throwing hardware at the problem—simply buying more Web servers to handle the expanding volume of user requests.&lt;/p&gt; &lt;p nd="39"&gt;But at 500,000 accounts, which MySpace reached in early 2004, the workload became too much for a single database.&lt;/p&gt; &lt;p nd="40"&gt;Adding databases isn't as simple as adding Web servers. When a single Web site is supported by multiple databases, its designers must decide how to subdivide the database workload while maintaining the same consistency as if all the data were stored in one place.&lt;/p&gt; &lt;p nd="41"&gt;In the second-generation architecture, MySpace ran on three SQL Server databases—one designated as the master copy to which all new data would be posted and then replicated to the other two, which would concentrate on retrieving data to be displayed on blog and profile pages. This also worked well—for a while—with the addition of more database servers and bigger hard disks to keep up with the continued growth in member accounts and the volume of data being posted.&lt;/p&gt; &lt;ziffplacead&gt; &lt;/ziffplacead&gt;&lt;p&gt;&lt;ziffarticle id="198614" page="5"&gt;&lt;b&gt;Next page: Second Milestone: 1-2 Million Accounts&lt;/b&gt;&lt;/ziffarticle&gt;  &lt;ziffpage title="Second Milestone: 1-2 Million Accounts"&gt;  &lt;img src="http://www.baselinemag.com/print_article2/images/bl_spacer.gif" width="500" height="1" /&gt; &lt;/ziffpage&gt;&lt;/p&gt;&lt;p&gt;&lt;b&gt;Second Milestone: 1-2 Million Accounts&lt;/b&gt;&lt;/p&gt; &lt;p nd="42"&gt;As MySpace registration passed 1 million accounts and was closing in on 2 million, the service began knocking up against the input/output (I/O) capacity of the database servers—the speed at which they were capable of reading and writing data. This was still just a few months into the life of the service, in mid-2004. As MySpace user postings backed up, like a thousand groupies trying to squeeze into a nightclub with room for only a few hundred, the Web site began suffering from "major inconsistencies," Benedetto says, meaning that parts of the Web site were forever slightly out of date.&lt;/p&gt; &lt;ziffplacead&gt; &lt;/ziffplacead&gt;&lt;p nd="43"&gt;"A comment that someone had posted wouldn't show up for 5 minutes, so users were always complaining that the site was broken," he adds.&lt;/p&gt; &lt;p nd="44"&gt;The next database architecture was built around the concept of vertical partitioning, with separate databases for parts of the Web site that served different functions such as the log-in screen, user profiles and blogs. Again, the Web site's scalability problems seemed to have been solved—for a while.&lt;/p&gt; &lt;p nd="45"&gt;The vertical partitioning scheme helped divide up the workload for database reads and writes alike, and when users demanded a new feature, MySpace would put a new database online to support it. At 2 million accounts, MySpace also switched from using storage devices directly attached to its database servers to a storage area network (SAN), in which a pool of disk storage devices are tied together by a high-speed, specialized network, and the databases connect to the SAN. The change to a SAN boosted performance, uptime and reliability, Benedetto says.&lt;/p&gt;  &lt;p&gt;&lt;ziffarticle id="198614" page="6"&gt;&lt;b&gt;Next page: Third Milestone: 3 Million Accounts&lt;/b&gt;&lt;/ziffarticle&gt;  &lt;ziffpage title="Third Milestone: 3 Million Accounts"&gt;  &lt;img src="http://www.baselinemag.com/print_article2/images/bl_spacer.gif" width="500" height="1" /&gt; &lt;/ziffpage&gt;&lt;/p&gt;&lt;p&gt;&lt;b&gt;Third Milestone: 3 Million Accounts&lt;/b&gt;&lt;/p&gt; &lt;p nd="46"&gt;As the Web site's growth continued, hitting 3 million registered users, the vertical partitioning solution couldn't last. Even though the individual applications on sub-sections of the Web site were for the most part independent, there was also information they all had to share. In this architecture, every database had to have its own copy of the users table—the electronic roster of authorized MySpace users. That meant when a new user registered, a record for that account had to be created on nine different database servers. Occasionally, one of those transactions would fail, perhaps because one particular database server was momentarily unavailable, leaving the user with a partially created account where everything but, for example, the blog feature would work for that person.&lt;/p&gt; &lt;p nd="47"&gt;And there was another problem. Eventually, individual applications like blogs on sub-sections of the Web site would grow too large for a single database server.&lt;/p&gt; &lt;p nd="48"&gt;By mid-2004, MySpace had arrived at the point where it had to make what Web developers call the "scale up" versus "scale out" decision—whether to scale up to bigger, more powerful and more expensive servers, or spread out the database workload across lots of relatively cheap servers. In general, large Web sites tend to adopt a scale-out approach that allows them to keep adding capacity by adding more servers. &lt;/p&gt; &lt;p nd="49"&gt;But a successful scale-out architecture requires solving complicated distributed computing problems, and large Web site operators such as Google, Yahoo and Amazon.com have had to invent a lot of their own technology to make it work. For example, Google created its own distributed file system to handle distributed storage of the data it gathers and analyzes to index the Web.&lt;/p&gt; &lt;p nd="50"&gt;In addition, a scale-out strategy would require an extensive rewrite of the Web site software to make programs designed to run on a single server run across many—which, if it failed, could easily cost the developers their jobs, Benedetto says.&lt;/p&gt; &lt;ziffplacead&gt; &lt;/ziffplacead&gt;&lt;p nd="51"&gt;So, MySpace gave serious consideration to a scale-up strategy, spending a month and a half studying the option of upgrading to 32-processor servers that would be able to manage much larger databases, according to Benedetto. "At the time, this looked like it could be the panacea for all our problems," he says, wiping away scalability issues for what appeared then to be the long term. Best of all, it would require little or no change to the Web site software.&lt;/p&gt; &lt;p nd="52"&gt;Unfortunately, that high-end server hardware was just too expensive—many times the cost of buying the same processor power and memory spread across multiple servers, Benedetto says. Besides, the Web site's architects foresaw that even a super-sized database could ultimately be overloaded, he says: "In other words, if growth continued, we were going to have to scale out anyway."&lt;/p&gt; &lt;p nd="53"&gt;So, MySpace moved to a distributed computing architecture in which many physically separate computer servers were made to function like one logical computer. At the database level, this meant reversing the decision to segment the Web site into multiple applications supported by separate databases, and instead treat the whole Web site as one application. Now there would only be one user table in that database schema because the data to support blogs, profiles and other core features would be stored together.&lt;/p&gt; &lt;p nd="54"&gt;Now that all the core data was logically organized into one database, MySpace had to find another way to divide up the workload, which was still too much to be managed by a single database server running on commodity hardware. This time, instead of creating separate databases for Web site functions or applications, MySpace began splitting its user base into chunks of 1 million accounts and putting all the data keyed to those accounts in a separate instance of SQL Server. Today, MySpace actually runs two copies of SQL Server on each server computer, for a total of 2 million accounts per machine, but Benedetto notes that doing so leaves him the option of cutting the workload in half at any time with minimal disruption to the Web site architecture.&lt;/p&gt; &lt;p nd="55"&gt;There is still a single database that contains the user name and password credentials for all users. As members log in, the Web site directs them to the database server containing the rest of the data for their account. But even though it must support a massive user table, the load on the log-in database is more manageable because it is dedicated to that function alone.&lt;/p&gt;  &lt;p&gt;&lt;ziffarticle id="198614" page="7"&gt;&lt;b&gt;Next page: Fourth Milestone: 9 Million–17 Million Accounts&lt;/b&gt;&lt;/ziffarticle&gt;  &lt;ziffpage title="Fourth Milestone: 9 Million–17 Million Accounts"&gt;  &lt;img src="http://www.baselinemag.com/print_article2/images/bl_spacer.gif" width="500" height="1" /&gt; &lt;/ziffpage&gt;&lt;/p&gt;&lt;p&gt;&lt;b&gt;Fourth Milestone: 9 Million–17 Million Accounts&lt;/b&gt;&lt;/p&gt; &lt;p nd="56"&gt;When MySpace reached 9 million accounts, in early 2005, it began deploying new Web software written in Microsoft's C# programming language and running under ASP.NET. C# is the latest in a long line of derivatives of the C programming language, including C++ and Java, and was created to dovetail with the Microsoft .NET Framework, Microsoft's model architecture for software components and distributed computing. ASP.NET, which evolved from the earlier Active Server Pages technology for Web site scripting, is Microsoft's current Web site programming environment.&lt;/p&gt; &lt;p nd="57"&gt;Almost immediately, MySpace saw that the ASP.NET programs ran much more efficiently, consuming a smaller share of the processor power on each server to perform the same tasks as a comparable ColdFusion program. According to CTO Whitcomb, 150 servers running the new code were able to do the same work that had previously required 246. Benedetto says another reason for the performance improvement may have been that in the process of changing software platforms and rewriting code in a new language, Web site programmers reexamined every function for ways it could be streamlined.&lt;/p&gt; &lt;p nd="58"&gt;Eventually, MySpace began a wholesale migration to ASP.NET. The remaining ColdFusion code was adapted to run on ASP.NET rather than on a Cold-Fusion server, using BlueDragon.NET, a product from New Atlanta Communications of Alpharetta, Ga., that automatically recompiles ColdFusion code for the Microsoft environment.&lt;/p&gt; &lt;p nd="59"&gt;When MySpace hit 10 million accounts, it began to see storage bottlenecks again. Implementing a SAN had solved some early performance problems, but now the Web site's demands were starting to periodically overwhelm the SAN's I/O capacity—the speed with which it could read and write data to and from disk storage.&lt;/p&gt; &lt;p nd="60"&gt;Part of the problem was that the 1 million-accounts-per-database division of labor only smoothed out the workload when it was spread relatively evenly across all the databases on all the servers. That was usually the case, but not always. For example, the seventh 1 million-account database MySpace brought online wound up being filled in just seven days, largely because of the efforts of one Florida band that was particularly aggressive in urging fans to sign up. &lt;/p&gt; &lt;p nd="61"&gt;Whenever a particular database was hit with a disproportionate load, for whatever reason, the cluster of disk storage devices in the SAN dedicated to that database would be overloaded. "We would have disks that could handle significantly more I/O, only they were attached to the wrong database," Benedetto says.&lt;/p&gt; &lt;p nd="62"&gt;At first, MySpace addressed this issue by continually redistributing data across the SAN to reduce these imbalances, but it was a manual process "that became a full-time job for about two people," Benedetto says.&lt;/p&gt; &lt;ziffplacead&gt; &lt;/ziffplacead&gt;&lt;p nd="63"&gt;The longer-term solution was to move to a virtualized storage architecture where the entire SAN is treated as one big pool of storage capacity, without requiring that specific disks be dedicated to serving specific applications. MySpace now standardized on equipment from a relatively new SAN vendor, 3PARdata of Fremont, Calif., that offered a different approach to SAN architecture.&lt;/p&gt; &lt;p nd="64"&gt;In a 3PAR system, storage can still be logically partitioned into volumes of a given capacity, but rather than being assigned to a specific disk or disk cluster, volumes can be spread or "striped" across thousands of disks. This makes it possible to spread out the workload of reading and writing data more evenly. So, when a database needs to write a chunk of data, it will be recorded to whichever disks are available to do the work at that moment rather than being locked to a disk array that might be overloaded. And since multiple copies are recorded to different disks, data can also be retrieved without overloading any one component of the SAN.&lt;/p&gt; &lt;p nd="65"&gt;To further lighten the burden on its storage systems when it reached 17 million accounts, in the spring of 2005 MySpace added a caching tier—a layer of servers placed between the Web servers and the database servers whose sole job was to capture copies of frequently accessed data objects in memory and serve them to the Web application without the need for a database lookup. In other words, instead of querying the database 100 times when displaying a particular profile page to 100 Web site visitors, the site could query the database once and fulfill each subsequent request for that page from the cached data. Whenever a page changes, the cached data is erased from memory and a new database lookup must be performed—but until then, the database is spared that work, and the Web site performs better.&lt;/p&gt; &lt;p nd="66"&gt;The cache is also a better place to store transitory data that doesn't need to be recorded in a database, such as temporary files created to track a particular user's session on the Web site—a lesson that Benedetto admits he had to learn the hard way. "I'm a database and storage guy, so my answer tended to be, let's put everything in the database," he says, but putting inappropriate items such as session tracking data in the database only bogged down the Web site.&lt;/p&gt; &lt;p nd="67"&gt;The addition of the cache servers is "something we should have done from the beginning, but we were growing too fast and didn't have time to sit down and do it," Benedetto adds.&lt;/p&gt;   &lt;p&gt;&lt;ziffarticle id="198614" page="8"&gt;&lt;b&gt;Fifth Milestone: 26 Million Accounts&lt;/b&gt;&lt;/ziffarticle&gt; &lt;ziffpage title="Fifth Milestone: 26 Million Accounts"&gt;  &lt;img src="http://www.baselinemag.com/print_article2/images/bl_spacer.gif" width="500" height="1" /&gt; &lt;/ziffpage&gt;&lt;/p&gt;&lt;p&gt;&lt;b&gt;Fifth Milestone: 26 Million Accounts&lt;/b&gt;&lt;/p&gt; &lt;p nd="68"&gt;In mid-2005, when the service reached 26 million accounts, MySpace switched to SQL Server 2005 while the new edition of Microsoft's database software was still in beta testing. Why the hurry? The main reason was this was the first release of SQL Server to fully exploit the newer 64-bit processors, which among other things significantly expand the amount of memory that can be accessed at one time. "It wasn't the features, although the features are great," Benedetto says. "It was that we were so bottlenecked by memory."&lt;/p&gt; &lt;p nd="69"&gt;More memory translates into faster performance and higher capacity, which MySpace sorely needed. But as long as it was running a 32-bit version of SQL Server, each server could only take advantage of about 4 gigabytes of memory at a time. In the plumbing of a computer system, the difference between 64 bits and 32 bits is like widening the diameter of the pipe that allows information to flow in and out of memory. The effect is an exponential increase in memory access. With the upgrade to SQL Server 2005 and the 64-bit version of Windows Server 2003, MySpace could exploit 32 gigabytes of memory per server, and in 2006 it doubled its standard configuration to 64 gigabytes.&lt;/p&gt;  &lt;p&gt;&lt;ziffarticle id="198614" page="9"&gt;&lt;b&gt;Next page: Unexpected Errors&lt;/b&gt;&lt;/ziffarticle&gt;  &lt;ziffpage title="Unexpected Errors"&gt;  &lt;img src="http://www.baselinemag.com/print_article2/images/bl_spacer.gif" width="500" height="1" /&gt; &lt;/ziffpage&gt;&lt;/p&gt;&lt;p&gt;&lt;b&gt;Unexpected Errors&lt;/b&gt;&lt;/p&gt; &lt;p nd="70"&gt;If it were not for this series of upgrades and changes to systems architecture, the MySpace Web site wouldn't function at all. But what about the times when it still hiccups? What's behind those "Unexpected Error" screens that are the source of so many user complaints?&lt;/p&gt; &lt;p nd="71"&gt;One problem is that MySpace is pushing Microsoft's Web technologies into territory that only Microsoft itself has begun to explore, Benedetto says. As of November, MySpace was exceeding the number of simultaneous connections supported by SQL Server, causing the software to crash. The specific circumstances that trigger one of these crashes occur only about once every three days, but it's still frequent enough to be annoying, according to Benedetto. And anytime a database craps out, that's bad news if the data for the page you're trying to view is stored there. "Anytime that happens, and uncached data is unavailable through SQL Server, you'll see one of those unexpected errors," he explains.&lt;/p&gt; &lt;p nd="72"&gt;Last summer, MySpace's Windows 2003 servers shut down unexpectedly on multiple occasions. The culprit turned out to be a built-in feature of the operating system designed to prevent distributed denial of service attacks—a hacker tactic in which a Web site is subjected to so many connection requests from so many client computers that it crashes. MySpace is subject to those attacks just like many other top Web sites, but it defends against them at the network level rather than relying on this feature of Windows—which in this case was being triggered by hordes of legitimate connections from MySpace users.&lt;/p&gt; &lt;p nd="73"&gt;"We were scratching our heads for about a month trying to figure out why our Windows 2003 servers kept shutting themselves off," Benedetto says. Finally, with help from Microsoft, his team figured out how to tell the server to "ignore distributed denial of service; this is friendly fire."&lt;/p&gt; &lt;p nd="74"&gt;And then there was that Sunday night last July when a power outage in Los Angeles, where MySpace is headquartered, knocked the entire service offline for about 12 hours. The outage stood out partly because most other large Web sites use geographically distributed data centers to protect themselves against localized service disruptions. In fact, MySpace had two other data centers in operation at the time of this incident, but the Web servers housed there were still dependent on the SAN infrastructure in Los Angeles. Without that, they couldn't serve up anything more than a plea for patience.&lt;/p&gt; &lt;p nd="75"&gt;According to Benedetto, the main data center was designed to guarantee reliable service through connections to two different power grids, backed up by battery power and a generator with a 30-day supply of fuel. But in this case, both power grids failed, and in the process of switching to backup power, operators blew the main power circuit.&lt;/p&gt; &lt;p nd="76"&gt;MySpace is now working to replicate the SAN to two other backup sites by mid-2007. That will also help divvy up the Web site's workload, because in the normal course of business, each SAN location will be able to support one-third of the storage needs. But in an emergency, any one of the three sites would be able to sustain the Web site independently, Benedetto says.&lt;/p&gt; &lt;p nd="77"&gt;While MySpace still battles scalability problems, many users give it enough credit for what it does right that they are willing to forgive the occasional error page.&lt;/p&gt; &lt;p nd="78"&gt;"As a developer, I hate bugs, so sure it's irritating," says Dan Tanner, a 31-year-old software developer from Round Rock, Texas, who has used MySpace to reconnect with high school and college friends. "The thing is, it provides so much of a benefit to people that the errors and glitches we find are forgivable." If the site is down or malfunctioning one day, he simply comes back the next and picks up where he left off, Tanner says.&lt;/p&gt; &lt;p nd="79"&gt;That attitude is why most of the user forum responses to Drew's rant were telling him to calm down and that the problem would probably fix itself if he waited a few minutes. Not to be appeased, Drew wrote, "ive already emailed myspace twice, and its BS cause an hour ago it was working, now its not ... its complete BS." To which another user replied, "and it's free."&lt;/p&gt; &lt;p nd="80"&gt;Benedetto candidly admits that 100% reliability is not necessarily his top priority. "That's one of the benefits of not being a bank, of being a free service," he says.&lt;/p&gt; &lt;p nd="81"&gt;In other words, on MySpace the occasional glitch might mean the Web site loses track of someone's latest profile update, but it doesn't mean the site has lost track of that person's money. "That's one of the keys to the Web site's performance, knowing that we can accept some loss of data," Benedetto says. So, MySpace has configured SQL Server to extend the time between the "checkpoints" operations it uses to permanently record updates to disk storage—even at the risk of losing anywhere between 2 minutes and 2 hours of data—because this tweak makes the database run faster.&lt;/p&gt; &lt;p nd="82"&gt;Similarly, Benedetto's developers still often go through the whole process of idea, coding, testing and deployment in a matter of hours, he says. That raises the risk of introducing software bugs, but it allows them to introduce new features quickly. And because it's virtually impossible to do realistic load testing on this scale, the testing that they do perform is typically targeted at a subset of live users on the Web site who become unwitting guinea pigs for a new feature or tweak to the software, he explains.&lt;/p&gt; &lt;p nd="83"&gt;"We made a lot of mistakes," Benedetto says. "But in the end, I think we ended up doing more right than we did wrong." &lt;/p&gt;  &lt;p&gt;&lt;ziffarticle id="198614" page="10"&gt;&lt;b&gt;Next page: MySpace Base Case&lt;/b&gt;&lt;/ziffarticle&gt; &lt;ziffpage nd="84" title="MySpace Base Case"&gt;  &lt;img src="http://www.baselinemag.com/print_article2/images/bl_spacer.gif" width="500" height="1" /&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;MySpace Base Case&lt;/b&gt;&lt;br /&gt;&lt;b&gt;Headquarters: &lt;/b&gt;Fox Interactive Media (parent company), 407 N. Maple Drive, Beverly Hills, CA 90210&lt;br /&gt;&lt;b&gt;Phone:&lt;/b&gt; (310) 969-7200&lt;br /&gt;&lt;b&gt;Business:&lt;/b&gt; MySpace is a "next generation portal" built around a social networking Web site that allows members to meet, and stay connected with, other members, as well as their favorite bands and celebrities.&lt;br /&gt;&lt;b&gt;Chief Technology Officer:&lt;/b&gt; Aber Whitcomb&lt;br /&gt;&lt;b&gt;Financials in 2006:&lt;/b&gt; Estimated revenue of $200 million.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;BASELINE GOALS:&lt;/b&gt;&lt;br /&gt;&lt;/ziffpage&gt;&lt;/p&gt;&lt;/li&gt;&lt;li nd="85"&gt; Double MySpace.com advertising rates, which in 2006 were typically a little more than 10 cents per 1,000 impressions. &lt;/li&gt;&lt;li nd="86"&gt; Generate revenue of at least $400 million from MySpace—out of $500 million expected from News Corp.'s Fox Interactive Media unit—in this fiscal year. &lt;/li&gt;&lt;li nd="87"&gt;Secure revenue of $900 million over the next three years from a search advertising deal with Google.&lt;br /&gt;&lt;br /&gt; &lt;/li&gt;&lt;/span&gt;&lt;!-- Intellitxt span close --&gt;               &lt;/div&gt;               &lt;div class="print_article"&gt;        &lt;div class="print_article_header"&gt;     &lt;span class="print_article_title"&gt;User Customization: Too Much of a Good Thing?&lt;/span&gt;&lt;br /&gt;                      &lt;span class="print_article_date"&gt;January 16, 2007&lt;/span&gt;&lt;br /&gt;         &lt;/div&gt;           &lt;div class="print_article_byline"&gt;              By       &lt;a href="http://www.baselinemag.com/author_bio/0,1541,a=4895,00.asp" class="print_article_byline"&gt;David F. Carr&lt;/a&gt;        &lt;/div&gt;         &lt;span name="intelliTxt" id="intelliTXT"&gt;&lt;p nd="88"&gt;One of the features members love about MySpace is that it gives people who open up an account a great deal of freedom to customize their pages with Cascading Style Sheets (CSS), a Web format that allows users to change the fonts, colors and background images associated with any element of the page.&lt;/p&gt; &lt;p nd="89"&gt;That feature was really "kind of a mistake," says Duc Chau, one of the social networking site's original developers. In other words, he neglected to write a routine that would strip Web coding tags from user postings—a standard feature on most Web sites that allow user contributions.&lt;/p&gt; &lt;p nd="90"&gt;The Web site's managers belatedly debated whether to continue allowing users to post code "because it was making the page load slow, making some pages look ugly, and exposing security holes," recalls Jason Feffer, former MySpace vice president of operations. "Ultimately we said, users come first, and this is what they want. We decided to allow the users to do what they wanted to do, and we would deal with the headaches."&lt;/p&gt; &lt;p&gt;In addition to CSS, JavaScript, a type of programming code that runs in the user's browser, was originally allowed. But MySpace eventually decided to filter it out because it was exploited to hack the accounts of members who visited a particular profile page. MySpace, however, still experiences periodic security problems, such as the infected QuickTime video that turned up in December, automatically replicating itself from profile page to profile page. QuickTime's creator, Apple Computer, responded with a software patch for MySpace to distribute. Similar problems have cropped up in the past with other Web software, such as the Flash viewer.&lt;/p&gt;  &lt;/span&gt;&lt;!-- Intellitxt span close --&gt;               &lt;/div&gt;               &lt;div class="print_article"&gt;        &lt;div class="print_article_header"&gt;     &lt;span class="print_article_title"&gt;Planner: Calculating the Costs of a Web Site Makeover&lt;/span&gt;&lt;br /&gt;                      &lt;span class="print_article_date"&gt;January 7, 2007&lt;/span&gt;&lt;br /&gt;         &lt;/div&gt;           &lt;div class="print_article_byline"&gt;            &lt;/div&gt;         &lt;span name="intelliTxt" id="intelliTXT"&gt;&lt;p&gt;&lt;b&gt;PROJECT OVERVIEW&lt;/b&gt;&lt;/p&gt; &lt;p&gt;At your consumer products company, "Web 2.0" is officially in danger of becoming this year's "thinking outside of the box": buzzword fodder that's big on elocution, but short on execution. &lt;/p&gt; &lt;p&gt;That's why this six-month project to enliven and build out your consumer Web site will place so much emphasis on defining exactly what your company wants to accomplish with "Web 2.0"—a catchall that means different things to different businesses. For your company, it will mean adding more dimension to a flat consumer Web site and, most important, developing more direct connections to a customer base that gets harder to reach each day.&lt;/p&gt; &lt;p&gt;Part of your motivation here will be pure survival. Old-world media and marketing approaches are increasingly less effective in the face of rapidly fragmenting customer niches. But far from a business problem, those niches represent "an opportunity for customer engagement," says Patricia Seybold, founder and CEO of the Patricia Seybold Group and author of Outside Innovation: How Your Customers Will Co-Design Your Company's Future. "Instead of combating fragmentation, companies should be leveraging online tools and online communities to leverage customer fragmentation and to address more customers' needs, not fewer."&lt;/p&gt; &lt;p&gt;Delivering that online promise will mean actively and constructively involving your customers in your business processes through those online tools—blogs, surveys, contests, forums, ratings, and other one-to-one communication exchanges that will do more than just create a multilayered online presence. They'll also use a data analytics backbone to channel that customer interaction into tangible feedback on everything from smarter product development to more effective advertising to "viral" (buzzwords never die) word-of-mouth sales. &lt;/p&gt;  &lt;/span&gt;&lt;!-- Intellitxt span close --&gt;               &lt;/div&gt;               &lt;div class="print_article"&gt;        &lt;div class="print_article_header"&gt;     &lt;span class="print_article_title"&gt;Web Design Experts Grade MySpace&lt;/span&gt;&lt;br /&gt;                      &lt;span class="print_article_date"&gt;January 15, 2007&lt;/span&gt;&lt;br /&gt;         &lt;/div&gt;           &lt;div class="print_article_byline"&gt;              By       &lt;a href="http://www.baselinemag.com/author_bio/0,1541,a=4895,00.asp" class="print_article_byline"&gt;David F. Carr&lt;/a&gt;        &lt;/div&gt;         &lt;span name="intelliTxt" id="intelliTXT"&gt;&lt;p&gt;MySpace.com's continued growth flies in the face of much of what Web experts have told us for years about how to succeed on the Internet. It's buggy, often responding to basic user requests with the dreaded "Unexpected Error" screen, and stocked with thousands of pages that violate all sorts of conventional Web design standards with their wild colors and confusing background images. And yet, it succeeds anyway.&lt;/p&gt; &lt;p&gt;Why?&lt;/p&gt; &lt;p&gt;"The hurdle is a little bit lower for something like this specifically because it's not a mission-critical site," says Jakob Nielsen, the famed Web usability expert and principal of the Nielsen Norman Group, which has its headquarters in Fremont, Calif. "If someone were trying to launch an eBay competitor and it had problems like this, it would never get off the ground." For that reason, he finds it difficult to judge MySpace by the same standards as more utilitarian Web sites, such as a shopping site where usability flaws might lead to abandoned shopping carts.&lt;/p&gt; &lt;p&gt;On most Web sites designed to sell or inform, the rampant self-expression Nielsen sees on MySpace would be a fatal flaw. "Usually, people don't go to a Web site to see how to express yourself," he says. "But people do go to MySpace to see how you express yourself, to see what bands you like, all that kind of stuff."&lt;/p&gt; &lt;p&gt;The reliability of the service also winds up being judged by different standards, according to Nielsen. If a Web user finds an e-commerce site is down, switching to a competitor's Web site is an easy decision. "But in this case, because your friends are here, you're more likely to want to come back to this site rather than go to another site," Nielsen says. "Most other Web sites could not afford that."&lt;/p&gt; &lt;p&gt;From a different angle, Newsvine CEO Mike Davidson says one of the things MySpace has done a great job of is allowing millions of members to sort themselves into smaller communities of a more manageable size, based on school and interest group associations. Davidson has studied MySpace to glean ideas for social networking features he is adding to his own Web site for news junkies. As a Web developer and former manager of media product development for the ESPN.com sports news Web site, he admires the way MySpace has built a loyal community of members.&lt;/p&gt; &lt;p&gt;"One of the things MySpace has been really great about is turning crap into treasure," Davidson says. "You look at these profile pages, and most of the comments are stuff like, 'Love your hair,' so to an outsider, it's kind of stupid. But to that person, that's their life." The "treasure" MySpace extracts from this experience is the billions of page views recorded as users click from profile to profile, socializing and gossiping online.&lt;/p&gt; &lt;p&gt;On the other hand, parts of the MySpace Web application are so inefficient, requiring multiple clicks and page views to perform simple tasks, that a good redesign would probably eliminate two-thirds of those page views, Davidson says. Even if that hurt MySpace's bragging rights as one of the most visited Web sites, it would ultimately lead to more satisfied users and improve ad rates by making each page view count for more, he argues.&lt;/p&gt; &lt;p&gt;"In a lot of ways, he's very right," says Jason Feffer, a former MySpace vice president of operations. While denying that the Web site was intentionally designed to inflate the number of page views, he says it's true that MySpace winds up with such a high inventory of page views that there is never enough advertising to sell against it. "On the other hand, when you look at the result, it's hard to argue that what we did with the interface and navigation was bad," he maintains. "And why change it, when you have success?"&lt;/p&gt; &lt;p&gt;Feffer, who is currently working on his own startup of an undisclosed nature called SodaHead.com, says one of the biggest reasons MySpace succeeded was that its users were always willing to cut it some slack.&lt;/p&gt; &lt;p&gt;"Creating a culture where users are sympathetic is very important," Feffer says. Especially in the beginning, many users thought the Web site was "something Tom was running out of his garage," he says, referring to MySpace president and co-founder Tom Anderson, who is the public face of the service by virtue of being the first online "friend" who welcomes every new MySpace user.&lt;/p&gt; &lt;p&gt;That startup aura made users more tolerant of occasional bugs and outages, according to Feffer. "They would think that it was cool that during an outage, you're putting up Pac-Man for me to play with," he says. "If you're pretending to be Yahoo or Google, you're not going to get much sympathy."&lt;/p&gt; &lt;p&gt;MySpace is starting to be held to a higher standard, however, since being purchased by News Corp. in 2005, and the reaction was different following a 12-hour outage this past summer, Feffer says: "I don't think anyone believed it was Tom's little garage project anymore."&lt;/p&gt;  &lt;/span&gt;&lt;!-- Intellitxt span close --&gt;               &lt;/div&gt;               &lt;div class="print_article"&gt;        &lt;div class="print_article_header"&gt;     &lt;span class="print_article_title"&gt;MySpace Insiders&lt;/span&gt;&lt;br /&gt;                      &lt;span class="print_article_date"&gt;January 16, 2007&lt;/span&gt;&lt;br /&gt;         &lt;/div&gt;           &lt;div class="print_article_byline"&gt;              By       &lt;a href="http://www.baselinemag.com/author_bio/0,1541,a=4895,00.asp" class="print_article_byline"&gt;David F. Carr&lt;/a&gt;        &lt;/div&gt;         &lt;span name="intelliTxt" id="intelliTXT"&gt;&lt;b&gt;Rupert Murdoch&lt;br /&gt;&lt;/b&gt; Chairman, News Corp.&lt;br /&gt;As the creator of a media empire that includes 20th Century Fox, the Fox television stations, the New York Post and many other news, broadcast and music properties, Murdoch championed the purchase of MySpace.com as a way of significantly expanding Fox Interactive Media's presence on the Web.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Chris Dewolfe&lt;br /&gt;&lt;/b&gt; CEO, MySpace&lt;br /&gt;DeWolfe, who is also a co-founder of MySpace.com, led its creation while employed by Intermix Media and continues to manage it today as a unit of News Corp.'s Fox Interactive Media. Previously, he was CEO of the e-mail marketing firm ResponseBase, which Intermix bought in 2002.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Tom Anderson&lt;br /&gt;&lt;/b&gt; President, MySpace&lt;br /&gt;A co-founder of MySpace, Anderson is best known as "Tom," the first person who appears on the "friends list" of new MySpace.com members and who acts as the public face of the Web site's support organization. He and DeWolfe met at Xdrive, the Web file storage company where both worked prior to starting ResponseBase.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Aber Whitcomb&lt;br /&gt;&lt;/b&gt; Chief Technology Officer, MySpace&lt;br /&gt;Whitcomb is a co-founder of MySpace, where he is responsible for engineering and technical operations. He speaks frequently on the issues of large-scale computing, networking and storage.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Jim Benedetto&lt;br /&gt;&lt;/b&gt; Vice President of Technology, MySpace&lt;br /&gt;Benedetto joined MySpace about a month after it launched, in late 2003. On his own MySpace profile page, he describes himself as a 27-year-old 2001 graduate of the University of Southern California whose trip to Australia last year included diving in a shark tank. Just out of school in 2001, he joined Quack.com, a voice portal startup that was acquired by America Online. Today, Benedetto says he is "working triple overtime to take MySpace international."&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Jason Feffer&lt;br /&gt;&lt;/b&gt; Former vice president of operations, MySpace&lt;br /&gt;Starting with MySpace's launch in late 2003, Feffer was responsible for MySpace's advertising and support operations. He also worked with DoubleClick, the Web site advertising vendor, to ensure that its software met MySpace's scalability requirements and visitor targeting goals. Since leaving MySpace last summer, he has been working on a startup called SodaHead.com, which promises to offer a new twist on social networking when it launches later this year.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Duc Chau&lt;br /&gt;&lt;/b&gt; Founder and CEO, Flukiest&lt;br /&gt;Chau, as an employee of Intermix, led the creation of a pilot version of the MySpace Web site, which employed Perl and a MySQL database, but left Intermix shortly after the production Web site went live. He went on to work for StrongMail, a vendor of e-mail management appliances. Chau now runs Flukiest, a social networking and file-sharing Web site that is also selling its software for use within other Web sites.&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;&lt;!-- Intellitxt span close --&gt;               &lt;/div&gt;                       &lt;div class="print_article_header"&gt;     &lt;span class="print_article_title"&gt;MySpace Tech Roster&lt;/span&gt;&lt;br /&gt;                      &lt;span class="print_article_date"&gt;January 16, 2007&lt;/span&gt;&lt;br /&gt;         &lt;/div&gt;           &lt;div class="print_article_byline"&gt;              By       &lt;a href="http://www.baselinemag.com/author_bio/0,1541,a=4895,00.asp" class="print_article_byline"&gt;David F. Carr&lt;/a&gt;        &lt;/div&gt;         &lt;span name="intelliTxt" id="intelliTXT"&gt;MySpace has managed to scale its Web site infrastructure to meet booming demand by using a mix of time-proven and leading-edge information technologies. &lt;table width="100%" border="0" cellpadding="0" cellspacing="0"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td bgcolor="#333333"&gt; &lt;table width="100%" border="0" cellpadding="4" cellspacing="1"&gt;  &lt;tbody&gt;&lt;tr bgcolor="#f7f7e9"&gt; &lt;td class="bodycopymedium" valign="top"&gt;&lt;b&gt;APPLICATION&lt;/b&gt;&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;&lt;b&gt;PRODUCT&lt;/b&gt;&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;&lt;b&gt;SUPPLIER&lt;/b&gt;&lt;/td&gt;&lt;/tr&gt;  &lt;tr bgcolor="#e0e0e0"&gt; &lt;td class="bodycopymedium" valign="top"&gt;Web application technology &lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt; Microsoft Internet Information Services, .NET Framework&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;Microsoft&lt;/td&gt;&lt;/tr&gt;   &lt;tr bgcolor="#f7f7e9"&gt; &lt;td class="bodycopymedium" valign="top"&gt;Server operating system &lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt; Windows 2003&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;Microsoft&lt;/td&gt;&lt;/tr&gt;  &lt;tr bgcolor="#e0e0e0"&gt; &lt;td class="bodycopymedium" valign="top"&gt;Programming language and environment&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;Applications written in C# for ASP.NET&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;Microsoft&lt;/td&gt;&lt;/tr&gt;  &lt;tr bgcolor="#f7f7e9"&gt; &lt;td class="bodycopymedium" valign="top"&gt;Programming language and environment&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;Site originally launched on Adobe's ColdFusion; remaining ColdFusion code runs under New Atlanta's BlueDragon.NET product.&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;Adobe, New Atlanta &lt;/td&gt;&lt;/tr&gt;  &lt;tr bgcolor="#e0e0e0"&gt; &lt;td class="bodycopymedium" valign="top"&gt; Database &lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;SQL Server 2005 &lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;Microsoft&lt;/td&gt;&lt;/tr&gt;  &lt;tr bgcolor="#f7f7e9"&gt; &lt;td class="bodycopymedium" valign="top"&gt;Storage area network&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt; 3PAR Utility Storage&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;3PARdata&lt;/td&gt;&lt;/tr&gt;  &lt;tr bgcolor="#e0e0e0"&gt; &lt;td class="bodycopymedium" valign="top"&gt;Internet application acceleration&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;NetScaler&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;Citrix Systems&lt;/td&gt;&lt;/tr&gt;  &lt;tr bgcolor="#f7f7e9"&gt; &lt;td class="bodycopymedium" valign="top"&gt;Server hardware&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt; Standardized on HP 585 (see below)&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;Hewlett-Packard&lt;/td&gt;&lt;/tr&gt;  &lt;tr bgcolor="#e0e0e0"&gt; &lt;td class="bodycopymedium" valign="top"&gt;Ad server software    &lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;DART Enterprise&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;DoubleClick&lt;/td&gt;&lt;/tr&gt;  &lt;tr bgcolor="#f7f7e9"&gt; &lt;td class="bodycopymedium" valign="top"&gt;Search and keyword advertising&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt; Google search&lt;/td&gt; &lt;td class="bodycopymedium" valign="top"&gt;Google&lt;/td&gt;&lt;/tr&gt; &lt;/tbody&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;  &lt;span style="font-size:-1;"&gt;Standard database server configuration consists of Hewlett-Packard HP 585 servers with 4 AMD Opteron dual-core, 64-bit processors with 64 gigabytes of memory (recently upgraded from 32). The operating system is Windows 2003, Service Pack 1; the database software is Microsoft SQL Server 2005, Service Pack 1. There's a 10-gigabit-per-second Ethernet network card, plus two host bus adapters for storage area network communications. The infrastructure for the core user profiles application includes 65 of these database servers with a total capacity of more than 2 terabytes of memory, 520 processors and 130 gigabytes of network throughput. &lt;i&gt;Source: MySpace.com user conference presentations&lt;/i&gt;&lt;/span&gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8508852025527853905?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8508852025527853905/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8508852025527853905' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8508852025527853905'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8508852025527853905'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/inside-myspacecom.html' title='Inside MySpace.com'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8260713527040732384</id><published>2007-01-16T13:28:00.000-08:00</published><updated>2007-01-16T13:32:47.435-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>select * from pg_stat_activity order by backend_start;</title><content type='html'>select backend_start, client_addr from pg_stat_activity order by backend_start; &lt;br /&gt;&lt;br /&gt;select * from pg_stat_activity order by backend_start;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8260713527040732384?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8260713527040732384/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8260713527040732384' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8260713527040732384'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8260713527040732384'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/select-from-pgstatactivity-order-by.html' title='select * from pg_stat_activity order by backend_start;'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-581559056027402904</id><published>2007-01-16T01:33:00.001-08:00</published><updated>2007-01-16T01:35:08.291-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Debugging postgresql functions</title><content type='html'>in psql, i do the&lt;br /&gt;set client_min_messages TO debug;&lt;br /&gt;&lt;br /&gt;and then in the function you can do&lt;br /&gt;raise debug 'foo message: %', _a value;&lt;br /&gt;where % are replaced by the _a_value, variable, etc?&lt;br /&gt;&lt;br /&gt;the raise debug is only shown if client_min_messages is debug&lt;br /&gt;so you can leave them in there after, and turn on debug mode later&lt;br /&gt;then&lt;br /&gt;when you create&lt;br /&gt;the function&lt;br /&gt;to test i sometimes do&lt;br /&gt;begin transaction;&lt;br /&gt;select function();&lt;br /&gt;and then when it fails, or after i poke at results,&lt;br /&gt;rollback;&lt;br /&gt;well, handy for some kinds of testing&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-581559056027402904?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/581559056027402904/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=581559056027402904' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/581559056027402904'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/581559056027402904'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/debugging-postgresql-functions.html' title='Debugging postgresql functions'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2474864055165408321</id><published>2007-01-16T01:32:00.001-08:00</published><updated>2007-01-16T01:32:49.632-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Raw postgresql function</title><content type='html'>i would start off with&lt;br /&gt;create function do_vore (&lt;br /&gt;  pixpair_id  integer,&lt;br /&gt;  picture_id  integer&lt;br /&gt;) returns integer as $$&lt;br /&gt;DECLARE&lt;br /&gt;  _id    integer;&lt;br /&gt;BEGIN&lt;br /&gt;  _id := 0;&lt;br /&gt;&lt;br /&gt;  -- insert into pixpair_ips&lt;br /&gt;  -- increment count in pixpair&lt;br /&gt;  -- update picture set totals there too   &lt;br /&gt;  return _id;&lt;br /&gt;END;&lt;br /&gt;$$ language plpgsql;?1&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2474864055165408321?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2474864055165408321/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2474864055165408321' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2474864055165408321'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2474864055165408321'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/raw-postgresql-function.html' title='Raw postgresql function'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-7724894161097299605</id><published>2007-01-16T01:20:00.000-08:00</published><updated>2007-01-16T01:21:08.202-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='webserver'/><title type='text'>Reboot server detecting open connections</title><content type='html'>#!/bin/bash&lt;br /&gt;cd /home/mark/work/pop&lt;br /&gt;svn up&lt;br /&gt;echo 'updated to latest code. trying restart'&lt;br /&gt;&lt;br /&gt;#find number of active connections that are established&lt;br /&gt;number_of_conn=`netstat -apn 2&gt;&amp;1 |  grep :80 | grep -i established | wc -l`&lt;br /&gt;#echo $number_of_conn&lt;br /&gt;&lt;br /&gt;trial=0&lt;br /&gt;while [ $number_of_conn -ne 0 ]&lt;br /&gt;do&lt;br /&gt;trial=$((trial +1 ))&lt;br /&gt;echo "$number_of_conn connections: trying again"&lt;br /&gt;number_of_conn=`netstat -apn 2&gt;&amp;1 |  grep :80 | grep -i established | wc -l`&lt;br /&gt;done&lt;br /&gt;&lt;br /&gt;/home/mark/bin/startstop&lt;br /&gt;echo 'there were 0 connections. restarted'&lt;br /&gt;echo "number of trials: $trial"&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-7724894161097299605?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/7724894161097299605/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=7724894161097299605' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/7724894161097299605'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/7724894161097299605'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/reboot-server-detecting-open.html' title='Reboot server detecting open connections'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-3513781847780303014</id><published>2007-01-16T00:55:00.001-08:00</published><updated>2007-01-16T00:55:45.329-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Remove dup pixpair</title><content type='html'>remove_dup_pixpair&lt;br /&gt;select pic_id, sum(pic_votes) as votes from&lt;br /&gt;&lt;br /&gt;(&lt;br /&gt;select  pic1_id as pic_id,&lt;br /&gt;        pic1_votes as pic_votes&lt;br /&gt;from&lt;br /&gt;    pixpair p1&lt;br /&gt;&lt;br /&gt;union&lt;br /&gt;&lt;br /&gt;select  pic2_id as pic_id,&lt;br /&gt;        pic2_votes as pic_votes&lt;br /&gt;from&lt;br /&gt;    pixpair p2&lt;br /&gt;&lt;br /&gt;) a&lt;br /&gt;group by pic_id&lt;br /&gt;12:45:29 am&lt;br /&gt; &lt;br /&gt;sum of all pictures.total_votes should equal pixpair.total_votes&lt;br /&gt;12:45:33 am&lt;br /&gt;Travis &lt;br /&gt;and then, i sum this.&lt;br /&gt;select sum(votes) from&lt;br /&gt;(&lt;br /&gt;&lt;br /&gt;select pic_id, sum(pic_votes) as votes from&lt;br /&gt;&lt;br /&gt;(&lt;br /&gt;select  pic1_id as pic_id,&lt;br /&gt;        pic1_votes as pic_votes&lt;br /&gt;from&lt;br /&gt;    pixpair p1&lt;br /&gt;&lt;br /&gt;union&lt;br /&gt;&lt;br /&gt;select  pic2_id as pic_id,&lt;br /&gt;        pic2_votes as pic_votes&lt;br /&gt;from&lt;br /&gt;    pixpair p2&lt;br /&gt;&lt;br /&gt;) a&lt;br /&gt;group by pic_id&lt;br /&gt;&lt;br /&gt;)b&lt;br /&gt;sum&lt;br /&gt;-------&lt;br /&gt; 11622&lt;br /&gt;and that gives a differnt number still.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-3513781847780303014?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/3513781847780303014/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=3513781847780303014' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3513781847780303014'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3513781847780303014'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/remove-dup-pixpair.html' title='Remove dup pixpair'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8846545922572930001</id><published>2007-01-15T23:37:00.000-08:00</published><updated>2007-01-15T23:38:56.984-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Transactions and deletes in pgsql</title><content type='html'>&lt;span style="font-weight: bold;"&gt;begin transaction; &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;delete from pixpair_ips where pixpair_id in ( &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;select id from pixpair where pic1_id = 518 or pic2_id=518); &lt;/span&gt;&lt;span style="font-weight: bold;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;delete from pixpair where pic1_id = 518 or pic2_id=518; &lt;/span&gt;&lt;br /&gt; DELETE 41&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;select update_pixpair(518); &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt; &lt;/span&gt;update_pixpair&lt;br /&gt;----------------&lt;br /&gt;            43&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;commit transaction; &lt;/span&gt;&lt;br /&gt;COMMIT&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8846545922572930001?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8846545922572930001/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8846545922572930001' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8846545922572930001'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8846545922572930001'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/transactions-and-deletes-in-pgsql.html' title='Transactions and deletes in pgsql'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8813273510289181479</id><published>2007-01-15T23:33:00.000-08:00</published><updated>2007-01-15T23:37:45.143-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Password securing postgres</title><content type='html'># "local" is for Unix domain socket connections only&lt;br /&gt;local   all         all                               trust&lt;br /&gt;# IPv4 local connections:&lt;br /&gt;host    all         all         127.0.0.1/32          trust&lt;br /&gt;host    all         all         0.0.0.0             &lt;span style="font-weight: bold;"&gt;password&lt;/span&gt;&lt;br /&gt;host    all         all         all                   &lt;span style="font-weight: bold;"&gt;password&lt;br /&gt;#all and 0.0.0.0 mean the same - 0.0.0.0 is an ip address meaning any&lt;br /&gt;&lt;/span&gt;# IPv6 local connections:&lt;br /&gt;host    all         all         ::1/128               &lt;span style="font-weight: bold;"&gt;password &lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8813273510289181479?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8813273510289181479/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8813273510289181479' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8813273510289181479'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8813273510289181479'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/password-securing-postgres.html' title='Password securing postgres'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8796545828040619065</id><published>2007-01-15T23:31:00.000-08:00</published><updated>2007-01-15T23:32:18.916-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Duplicates in pictures table</title><content type='html'>&lt;span style="font-weight: bold;"&gt;select &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;p1.id as pix1_id, &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;p2.id as pix2_id &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;from &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;pixpair p1, &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;pixpair p2 &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;where &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;p1.pic1_id = p2.pic2_id and p1.pic2_id = p2.pic1_id; &lt;/span&gt;&lt;br /&gt;should show dupliacates..&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;select * from pixpair &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;                where (pic1_id = _new_pic_id and pic2_id = allpics.id) &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;                or (pic2_id = _new_pic_id and pic1.id = allpics.id) ) );&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8796545828040619065?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8796545828040619065/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8796545828040619065' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8796545828040619065'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8796545828040619065'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/duplicates-in-pictures-table.html' title='Duplicates in pictures table'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-1527884434246334021</id><published>2007-01-14T03:14:00.001-08:00</published><updated>2007-01-14T03:14:48.412-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='cheetah python gotchas'/><title type='text'>Dont forget to check if variable is valid before slicing object</title><content type='html'>#if $cutie.comment&lt;br /&gt;  $cutie.comment[0:100]&lt;br /&gt;#end if&lt;br /&gt;&lt;br /&gt; Dont forget to put if exists&lt;br /&gt; else you will get unscriptable object error&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-1527884434246334021?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/1527884434246334021/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=1527884434246334021' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1527884434246334021'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1527884434246334021'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/dont-forget-to-check-if-variable-is.html' title='Dont forget to check if variable is valid before slicing object'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8129753116780719777</id><published>2007-01-09T16:56:00.000-08:00</published><updated>2007-01-09T16:57:00.280-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='python'/><title type='text'>Static variables in python</title><content type='html'>&lt;pre&gt;&lt;tt&gt;&lt;big&gt;&lt;tt&gt;&lt;big&gt;ints are not modifiable, so += actually rebinds the variable.  Try:&lt;br /&gt;&lt;br /&gt;def_f():&lt;br /&gt;   global global_var_c_hits&lt;br /&gt;   global_var_c_hits +=1&lt;/big&gt;&lt;/tt&gt;&lt;/big&gt;&lt;/tt&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8129753116780719777?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8129753116780719777/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8129753116780719777' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8129753116780719777'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8129753116780719777'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/static-variables-in-python.html' title='Static variables in python'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-5355327316578471127</id><published>2007-01-08T18:42:00.001-08:00</published><updated>2007-01-08T18:43:10.709-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='cron'/><title type='text'>Cron jobs need quiet operation</title><content type='html'>Cron jobs need quiet operation; if a command generates output, you’ll get an email from cron with the command output. So if you want to fetch a file silently with wget or curl, use a command like this:&lt;br /&gt;&lt;br /&gt;curl –silent –output output_filename http://go.com/urltofetch.html&lt;br /&gt;&lt;br /&gt;wget –quiet –output-document output_filename http://google.com/urltofetch.html&lt;br /&gt;&lt;br /&gt;There are shorter versions of these options, but using the verbose options will make code or cron jobs easier to understand if you come back to them. Be aware that urls with “&amp;amp;” in them can confuse wget at least, so depending on your shell (bash, csh, tcsh), you may need to put single or double quotes around the url.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-5355327316578471127?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/5355327316578471127/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=5355327316578471127' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5355327316578471127'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/5355327316578471127'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/cron-jobs-need-quiet-operation.html' title='Cron jobs need quiet operation'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-7522740066314437535</id><published>2007-01-06T11:52:00.000-08:00</published><updated>2007-01-06T11:53:38.446-08:00</updated><title type='text'>Threaded data collection with Python</title><content type='html'>&lt;table class="entryhead" width="100%"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td class="title"&gt;&lt;h2&gt;&lt;a href="http://www.davidnaylor.co.uk/archives/2006/10/19/threaded-data-collection-with-python-including-examples/" rel="bookmark" title="Permanent Link to Threaded data collection with Python, including examples"&gt;Threaded data collection with Python, including examples&lt;/a&gt;&lt;/h2&gt;       &lt;p class="postmetadata"&gt;Posted in &lt;a href="http://www.davidnaylor.co.uk/archives/category/general/" title="View all posts in General" rel="category tag"&gt;General&lt;/a&gt; &lt;/p&gt; &lt;/td&gt;       &lt;td class="posttime" align="center" width="60"&gt;&lt;span&gt;October&lt;/span&gt; &lt;span class="day"&gt;19&lt;/span&gt; &lt;span&gt;2006&lt;/span&gt;&lt;!-- by Rob Haswell --&gt;&lt;/td&gt;      &lt;/tr&gt;     &lt;/tbody&gt;&lt;/table&gt;                  &lt;p&gt;On today's Internet 2.0 there are all sorts of data feeds available for consumption. From APIs to RSS feeds, it seems like nearly every site has a machine-readable output. There are many reasons why you'd want to collect this information, which I won't go in to, so in this post I'm going to walk you through an application which consumes RSS feeds. I'll be using the &lt;a href="http://www.python.org/"&gt;Python&lt;/a&gt; scripting language, and I'll show you an evolution of the ways to go about the task:&lt;/p&gt; &lt;h2&gt;Application introduction&lt;/h2&gt; &lt;p&gt;Our application is going to work like this:&lt;/p&gt; &lt;ul&gt;&lt;li&gt;A database contains the list of RSS feeds. This is long - 1000+ records&lt;/li&gt;&lt;li&gt;Our application reads this list of feeds and processes them&lt;/li&gt;&lt;li&gt;The items from the feeds are stored in the database&lt;/li&gt;&lt;/ul&gt; &lt;p&gt;Database manipulation and RSS feed parsing are outside the scope of this tutorial, so we'll start off by defining some empty functions that handle all this:&lt;/p&gt; &lt;div class="igBar"&gt;&lt;span id="lpython-1"&gt;&lt;a href="http://www.davidnaylor.co.uk/archives/2006/10/19/threaded-data-collection-with-python-including-examples/#" onclick="javascript:showPlainTxt('python-1'); return false;"&gt;PLAIN TEXT&lt;/a&gt;&lt;/span&gt;&lt;/div&gt; &lt;div class="syntax_hilite"&gt;&lt;span class="langName"&gt;PYTHON:&lt;/span&gt;&lt;br /&gt;&lt;div id="python-1"&gt; &lt;div class="python"&gt; &lt;ol&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;def&lt;/span&gt; get_feed_list&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(72, 61, 139);"&gt;""&lt;/span&gt;&lt;span style="color: rgb(72, 61, 139);"&gt;" Returns a list of tuples: (id, feed_url) "&lt;/span&gt;&lt;span style="color: rgb(72, 61, 139);"&gt;""&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;pass&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;def&lt;/span&gt; get_feed_contents&lt;span style="color: black;"&gt;(&lt;/span&gt;feed_url&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(72, 61, 139);"&gt;""&lt;/span&gt;&lt;span style="color: rgb(72, 61, 139);"&gt;" Gets feed over HTTP, returns RSS XML "&lt;/span&gt;&lt;span style="color: rgb(72, 61, 139);"&gt;""&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;pass&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;def&lt;/span&gt; parse_feed&lt;span style="color: black;"&gt;(&lt;/span&gt;feed_rss&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(72, 61, 139);"&gt;""&lt;/span&gt;&lt;span style="color: rgb(72, 61, 139);"&gt;" Parses the feed and returns a list of items "&lt;/span&gt;&lt;span style="color: rgb(72, 61, 139);"&gt;""&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;pass&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;def&lt;/span&gt; store_feed_items&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, items&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(72, 61, 139);"&gt;""&lt;/span&gt;&lt;span style="color: rgb(72, 61, 139);"&gt;" Takes a feed_id and a list of items and stored them in the DB "&lt;/span&gt;&lt;span style="color: rgb(72, 61, 139);"&gt;""&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;pass&lt;/span&gt; &lt;/div&gt; &lt;/li&gt;&lt;/ol&gt; &lt;/div&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt; We're going to have all these in a module called "functions", which can just be a file called functions.py in the same directory ( &lt; python3.0)&lt;br /&gt;&lt;/p&gt; &lt;h2&gt;Implementation 1: Single-threaded&lt;/h2&gt; &lt;p&gt;This is the way most people would do it at first. So simple, I'll just post the sample code:&lt;/p&gt; &lt;div class="igBar"&gt;&lt;span id="lpython-2"&gt;&lt;a href="http://www.davidnaylor.co.uk/archives/2006/10/19/threaded-data-collection-with-python-including-examples/#" onclick="javascript:showPlainTxt('python-2'); return false;"&gt;PLAIN TEXT&lt;/a&gt;&lt;/span&gt;&lt;/div&gt; &lt;div class="syntax_hilite"&gt;&lt;span class="langName"&gt;PYTHON:&lt;/span&gt;&lt;br /&gt;&lt;div id="python-2"&gt; &lt;div class="python"&gt; &lt;ol&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;import&lt;/span&gt; functions&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;for&lt;/span&gt; &lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, feed_url &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;in&lt;/span&gt; get_feed_list&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    rss = functions.&lt;span style="color: black;"&gt;get_feed_contents&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;feed_url&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    items = functions.&lt;span style="color: black;"&gt;parse_feed&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;rss&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    functions.&lt;span style="color: black;"&gt;store_feed_items&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, items&lt;span style="color: black;"&gt;)&lt;/span&gt; &lt;/div&gt; &lt;/li&gt;&lt;/ol&gt; &lt;/div&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt; Pretty simple huh? But there are fundamental problems. Feeds are usually slow, meaning that your program will spend a lot of time waiting for feeds to come in before you can parse them. You program will also be spending time parsing feeds when it could be getting them from the internet as well. Consequently this program will be as slow as molasses. It's like eating a bowl of peas one at a time - you'd rather just shovel them in wouldn't you? Enter: threading.&lt;/p&gt; &lt;h2&gt;Implementation 2&lt;/h2&gt; &lt;p&gt;So we reckon: "If we use threads, this will make things faster?" Answer: Yes. However, there are quite a few ways of doing this. We'll start off with this:&lt;/p&gt; &lt;div class="igBar"&gt;&lt;span id="lpython-3"&gt;&lt;a href="http://www.davidnaylor.co.uk/archives/2006/10/19/threaded-data-collection-with-python-including-examples/#" onclick="javascript:showPlainTxt('python-3'); return false;"&gt;PLAIN TEXT&lt;/a&gt;&lt;/span&gt;&lt;/div&gt; &lt;div class="syntax_hilite"&gt;&lt;span class="langName"&gt;PYTHON:&lt;/span&gt;&lt;br /&gt;&lt;div id="python-3"&gt; &lt;div class="python"&gt; &lt;ol&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;import&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;threading&lt;/span&gt;, functions, &lt;span style="color: rgb(220, 20, 60);"&gt;time&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;def&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;thread&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, feed_url&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    rss = functions.&lt;span style="color: black;"&gt;get_feed_contents&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;feed_url&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    items = functions.&lt;span style="color: black;"&gt;parse_feed&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;rss&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    functions.&lt;span style="color: black;"&gt;store_feed_items&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, items&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt; &lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;for&lt;/span&gt; &lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, feed_url &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;in&lt;/span&gt; get_feed_list&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    t = &lt;span style="color: rgb(220, 20, 60);"&gt;threading&lt;/span&gt;.&lt;span style="color: rgb(220, 20, 60);"&gt;Thread&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;target=&lt;span style="color: rgb(220, 20, 60);"&gt;thread&lt;/span&gt;, kwargs=&lt;span style="color: black;"&gt;{&lt;/span&gt;&lt;span style="color: rgb(72, 61, 139);"&gt;"id"&lt;/span&gt;:&lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, &lt;span style="color: rgb(72, 61, 139);"&gt;"feed_url"&lt;/span&gt;:feed_url&lt;span style="color: black;"&gt;}&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    t.&lt;span style="color: black;"&gt;start&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;while&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;threading&lt;/span&gt;.&lt;span style="color: black;"&gt;activeCount&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&gt; &lt;span style="color: rgb(128, 0, 0);"&gt;1&lt;/span&gt;: &lt;span style="color: rgb(220, 20, 60);"&gt;time&lt;/span&gt;.&lt;span style="color: black;"&gt;sleep&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(128, 0, 0);"&gt;1&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt; &lt;/div&gt; &lt;/li&gt;&lt;/ol&gt; &lt;/div&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt; &lt;strong&gt;Problem:&lt;/strong&gt; This is just going to create as many threads as there are feed items immediately and then wait for them to finish. This has the following issues:&lt;/p&gt; &lt;ul&gt;&lt;li&gt;If you try to request 1000+ pages at a time, many of them will time out. If a percentage of these are on the same server, you'll &lt;a href="http://en.wikipedia.org/wiki/Denial-of-service_attack"&gt;DoS&lt;/a&gt; it.&lt;/li&gt;&lt;li&gt;With 1000 threads your app will likely either run out of memory or get so bogged down in context switching it'll take forever&lt;/li&gt;&lt;li&gt;Try this on any consumer-grade router and it'll probably crash&lt;/li&gt;&lt;/ul&gt; &lt;p&gt;So what do we do? Well, let's set a limit on the number of concurrent threads:&lt;/p&gt; &lt;div class="igBar"&gt;&lt;span id="lpython-4"&gt;&lt;a href="http://www.davidnaylor.co.uk/archives/2006/10/19/threaded-data-collection-with-python-including-examples/#" onclick="javascript:showPlainTxt('python-4'); return false;"&gt;PLAIN TEXT&lt;/a&gt;&lt;/span&gt;&lt;/div&gt; &lt;div class="syntax_hilite"&gt;&lt;span class="langName"&gt;PYTHON:&lt;/span&gt;&lt;br /&gt;&lt;div id="python-4"&gt; &lt;div class="python"&gt; &lt;ol&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;import&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;threading&lt;/span&gt;, functions, &lt;span style="color: rgb(220, 20, 60);"&gt;time&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;THREAD_LIMIT = &lt;span style="color: rgb(128, 0, 0);"&gt;20&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;def&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;thread&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, feed_url&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    rss = functions.&lt;span style="color: black;"&gt;get_feed_contents&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;feed_url&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    items = functions.&lt;span style="color: black;"&gt;parse_feed&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;rss&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    functions.&lt;span style="color: black;"&gt;store_feed_items&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, items&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt; &lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;for&lt;/span&gt; &lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, feed_url &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;in&lt;/span&gt; get_feed_list&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;while&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;threading&lt;/span&gt;.&lt;span style="color: black;"&gt;activeCount&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&gt; THREAD_LIMIT:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;        &lt;span style="color: rgb(220, 20, 60);"&gt;time&lt;/span&gt;.&lt;span style="color: black;"&gt;sleep&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(128, 0, 0);"&gt;1&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    t = &lt;span style="color: rgb(220, 20, 60);"&gt;threading&lt;/span&gt;.&lt;span style="color: rgb(220, 20, 60);"&gt;Thread&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;target=&lt;span style="color: rgb(220, 20, 60);"&gt;thread&lt;/span&gt;, kwargs=&lt;span style="color: black;"&gt;{&lt;/span&gt;&lt;span style="color: rgb(72, 61, 139);"&gt;"id"&lt;/span&gt;:&lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, &lt;span style="color: rgb(72, 61, 139);"&gt;"feed_url"&lt;/span&gt;:feed_url&lt;span style="color: black;"&gt;}&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    t.&lt;span style="color: black;"&gt;start&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;while&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;threading&lt;/span&gt;.&lt;span style="color: black;"&gt;activeCount&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&gt; &lt;span style="color: rgb(128, 0, 0);"&gt;1&lt;/span&gt;: &lt;span style="color: rgb(220, 20, 60);"&gt;time&lt;/span&gt;.&lt;span style="color: black;"&gt;sleep&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(128, 0, 0);"&gt;1&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt; &lt;/div&gt; &lt;/li&gt;&lt;/ol&gt; &lt;/div&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt; Spot the difference? We have another &lt;code&gt;while&lt;/code&gt; loop right in the &lt;code&gt;for&lt;/code&gt; loop. This is going to make our main thread code hang there while there are other threads still running.&lt;/p&gt; &lt;p&gt;There's another problem though, and that's with the model. In this mode, we're continually creating new threads that live for a short time, then exit. This isn't efficient. It would be much better to create a pool of threads which we can then re-use. Let's kick this up a notch.&lt;/p&gt; &lt;h2&gt;Implementaton 3: Thread pool, and a Queue&lt;/h2&gt; &lt;p&gt;So in this version we're going to do a few new things:&lt;/p&gt; &lt;ol&gt;&lt;li&gt;Use a &lt;code&gt;Queue&lt;/code&gt; object and populate it with the list of urls.&lt;/li&gt;&lt;li&gt;Spawns a number of threads that will read items off this Queue.&lt;/li&gt;&lt;li&gt;The threads will process the data and store it.&lt;/li&gt;&lt;/ol&gt; &lt;p&gt;I'll start off with the sample code then walk you through it:&lt;/p&gt; &lt;div class="igBar"&gt;&lt;span id="lpython-5"&gt;&lt;a href="http://www.davidnaylor.co.uk/archives/2006/10/19/threaded-data-collection-with-python-including-examples/#" onclick="javascript:showPlainTxt('python-5'); return false;"&gt;PLAIN TEXT&lt;/a&gt;&lt;/span&gt;&lt;/div&gt; &lt;div class="syntax_hilite"&gt;&lt;span class="langName"&gt;PYTHON:&lt;/span&gt;&lt;br /&gt;&lt;div id="python-5"&gt; &lt;div class="python"&gt; &lt;ol&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;import&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;threading&lt;/span&gt;, functions, &lt;span style="color: rgb(220, 20, 60);"&gt;time&lt;/span&gt;, &lt;span style="color: rgb(220, 20, 60);"&gt;Queue&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;THREAD_LIMIT = &lt;span style="color: rgb(128, 0, 0);"&gt;50&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;jobs = &lt;span style="color: rgb(220, 20, 60);"&gt;Queue&lt;/span&gt;.&lt;span style="color: rgb(220, 20, 60);"&gt;Queue&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(128, 0, 0);"&gt;0&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt; &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# arg1 means "No item limit"&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt; &lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;def&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;thread&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;while&lt;/span&gt; &lt;span style="color: rgb(0, 128, 0);"&gt;True&lt;/span&gt;: &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# forever&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;        &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;try&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;            &lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, feed_url = jobs.&lt;span style="color: black;"&gt;get&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(0, 128, 0);"&gt;False&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt; &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# arg1 means "Don't wait for items to appear"&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;        &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;except&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;Queue&lt;/span&gt;.&lt;span style="color: black;"&gt;Empty&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;            &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# Nothing left to do, time to die&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;            &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;return&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;        rss = functions.&lt;span style="color: black;"&gt;get_feed_contents&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;feed_url&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;        items = functions.&lt;span style="color: black;"&gt;parse_feed&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;rss&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;        functions.&lt;span style="color: black;"&gt;store_feed_items&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, items&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;for&lt;/span&gt; info &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;in&lt;/span&gt; get_feed_list&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(220, 20, 60);"&gt;Queue&lt;/span&gt;.&lt;span style="color: black;"&gt;put&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;info&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt; &lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;for&lt;/span&gt; n &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;in&lt;/span&gt; &lt;span style="color: rgb(0, 128, 0);"&gt;xrange&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;THREAD_LIMIT&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    t = &lt;span style="color: rgb(220, 20, 60);"&gt;threading&lt;/span&gt;.&lt;span style="color: rgb(220, 20, 60);"&gt;Thread&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;target=&lt;span style="color: rgb(220, 20, 60);"&gt;thread&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    t.&lt;span style="color: black;"&gt;start&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;while&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;threading&lt;/span&gt;.&lt;span style="color: black;"&gt;activeCount&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&gt; &lt;span style="color: rgb(128, 0, 0);"&gt;1&lt;/span&gt;: &lt;span style="color: rgb(220, 20, 60);"&gt;time&lt;/span&gt;.&lt;span style="color: black;"&gt;sleep&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(128, 0, 0);"&gt;1&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt; &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# Wait to finish &lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;/ol&gt; &lt;/div&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt; Lines to note:&lt;/p&gt; &lt;ul&gt;&lt;li&gt;1: We create the job list. We use a &lt;a href="http://docs.python.org/lib/QueueObjects.html"&gt;Queue&lt;/a&gt; class from the standard library, as it's threadsafe. In fact, this is exactly what it was designed for.&lt;/li&gt;&lt;li&gt;7-11: Read from the queue. The &lt;code&gt;False&lt;/code&gt; parameter means that once the queue is empty, we're not interested any more. This raises the &lt;code&gt;Queue.Empty&lt;/code&gt; exception, and we terminate the thread.&lt;/li&gt;&lt;li&gt;16: Put lots of info into the queue&lt;/li&gt;&lt;/ul&gt; &lt;p&gt;Our threads run in a loop, performing work in lines 12-14, until there is no more work, then they exit. This model will work just fine for the majority of people, however, there are (still) problems. They are:&lt;/p&gt; &lt;ol&gt;&lt;li&gt;Opening 50 threads that write to a database will usually mean 50 database connections, or lots of locking. Either way, this is bad. Your data-getting threads don't want to be sat around waiting on a DB lock when they could be getting more information. Worse, you don't want to exceed your connection limit to the databse.&lt;/li&gt;&lt;li&gt;In this model, you have one master thread doing nothing and 50 threads doing I/O and work. This isn't a great idea in this application. Ideally you should only use threading (in Python) when you need to either: &lt;ol&gt;&lt;li&gt;Wait for I/O&lt;/li&gt;&lt;li&gt;Truly perform more than one concurrent task&lt;/li&gt;&lt;/ol&gt; &lt;/li&gt;&lt;/ol&gt; &lt;p&gt;Problem #2 isn't so serious, but it would be better to have more control over the heavy lifting. However problem #1 definitely needs addressing. The solution is to shift all processing inline to the master thread, which takes care of all the processing.&lt;/p&gt; &lt;h2&gt;Implementation 4: 1 worker, many runners&lt;/h2&gt; &lt;p&gt;Here's the code:&lt;/p&gt; &lt;div class="igBar"&gt;&lt;span id="lpython-6"&gt;&lt;a href="http://www.davidnaylor.co.uk/archives/2006/10/19/threaded-data-collection-with-python-including-examples/#" onclick="javascript:showPlainTxt('python-6'); return false;"&gt;PLAIN TEXT&lt;/a&gt;&lt;/span&gt;&lt;/div&gt; &lt;div class="syntax_hilite"&gt;&lt;span class="langName"&gt;PYTHON:&lt;/span&gt;&lt;br /&gt;&lt;div id="python-6"&gt; &lt;div class="python"&gt; &lt;ol&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;import&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;threading&lt;/span&gt;, functions, &lt;span style="color: rgb(220, 20, 60);"&gt;time&lt;/span&gt;, &lt;span style="color: rgb(220, 20, 60);"&gt;Queue&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;THREAD_LIMIT = &lt;span style="color: rgb(128, 0, 0);"&gt;50&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;jobs = &lt;span style="color: rgb(220, 20, 60);"&gt;Queue&lt;/span&gt;.&lt;span style="color: rgb(220, 20, 60);"&gt;Queue&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(128, 0, 0);"&gt;0&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt; &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# arg1 means "No item limit"&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;rss_to_process = &lt;span style="color: rgb(220, 20, 60);"&gt;Queue&lt;/span&gt;.&lt;span style="color: rgb(220, 20, 60);"&gt;Queue&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;THREAD_LIMIT&lt;span style="color: black;"&gt;)&lt;/span&gt; &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# We set a limit on this, I'll&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;                                           &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# explain later&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;def&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;thread&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;while&lt;/span&gt; &lt;span style="color: rgb(0, 128, 0);"&gt;True&lt;/span&gt;: &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# forever&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;        &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;try&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;            &lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, feed_url = jobs.&lt;span style="color: black;"&gt;get&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(0, 128, 0);"&gt;False&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt; &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# arg1 means "Don't wait for items&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;                                           &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# to appear"&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;        &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;except&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;Queue&lt;/span&gt;.&lt;span style="color: black;"&gt;Empty&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;            &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# Nothing left to do, time to die&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;            &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;return&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;        rss = functions.&lt;span style="color: black;"&gt;get_feed_contents&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;feed_url&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;        rss_to_process.&lt;span style="color: black;"&gt;put&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, rss&lt;span style="color: black;"&gt;)&lt;/span&gt;, &lt;span style="color: rgb(0, 128, 0);"&gt;True&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt; &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# This will block if our processing&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;                                            &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# queue is too large&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;for&lt;/span&gt; info &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;in&lt;/span&gt; get_feed_list&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;: &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# Load them up&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    jobs.&lt;span style="color: black;"&gt;put&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;info&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt; &lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;for&lt;/span&gt; n &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;in&lt;/span&gt; &lt;span style="color: rgb(0, 128, 0);"&gt;xrange&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;THREAD_LIMIT&lt;span style="color: black;"&gt;)&lt;/span&gt;: &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# Unleash the hounds&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    t = &lt;span style="color: rgb(220, 20, 60);"&gt;threading&lt;/span&gt;.&lt;span style="color: rgb(220, 20, 60);"&gt;Thread&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;target=&lt;span style="color: rgb(220, 20, 60);"&gt;thread&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    t.&lt;span style="color: black;"&gt;start&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;&lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;while&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;threading&lt;/span&gt;.&lt;span style="color: black;"&gt;activeCount&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;&gt; &lt;span style="color: rgb(128, 0, 0);"&gt;1&lt;/span&gt; &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;or&lt;/span&gt; &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;not&lt;/span&gt; rss_to_process.&lt;span style="color: black;"&gt;empty&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# That condition means we want to do this loop if there are threads&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# running OR there's stuff to process&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;try&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;        &lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, rss = rss_to_process.&lt;span style="color: black;"&gt;get&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(0, 128, 0);"&gt;False&lt;/span&gt;, &lt;span style="color: rgb(128, 0, 0);"&gt;1&lt;/span&gt;&lt;span style="color: black;"&gt;)&lt;/span&gt; &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# Wait for up to a second for a&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;                                               &lt;span style="color: rgb(128, 128, 128); font-style: italic;"&gt;# result&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;except&lt;/span&gt; &lt;span style="color: rgb(220, 20, 60);"&gt;Queue&lt;/span&gt;.&lt;span style="color: black;"&gt;Empty&lt;/span&gt;:&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;        &lt;span style="color: rgb(255, 119, 0); font-weight: bold;"&gt;continue&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-weight: bold; color: rgb(38, 83, 106);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    items = functions.&lt;span style="color: black;"&gt;parse_feed&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;rss&lt;span style="color: black;"&gt;)&lt;/span&gt;&lt;/div&gt; &lt;/li&gt;&lt;li style="font-family: 'Courier New',Courier,monospace; font-weight: normal; font-style: normal; color: rgb(58, 106, 139);"&gt; &lt;div style="font-family: 'Courier New',Courier,monospace; font-weight: normal;"&gt;    functions.&lt;span style="color: black;"&gt;store_feed_items&lt;/span&gt;&lt;span style="color: black;"&gt;(&lt;/span&gt;&lt;span style="color: rgb(0, 128, 0);"&gt;id&lt;/span&gt;, items&lt;span style="color: black;"&gt;)&lt;/span&gt; &lt;/div&gt; &lt;/li&gt;&lt;/ol&gt; &lt;/div&gt; &lt;/div&gt; &lt;/div&gt; &lt;p&gt; Notes:&lt;/p&gt; &lt;ul&gt;&lt;li&gt;4: Here we have a new Queue to return the RSS XML to the master thread. We set a limit on the size of the queue in case the threads return data faster than we can process it, meaning the queue will fill up with XML, burning memory. This sets a rudimentary rate limit on the threads, as they will block when the queue is full.&lt;/li&gt;&lt;li&gt;15: This is the line in the thread that blocks when the processing queue is full.&lt;/li&gt;&lt;li&gt;25-34: This is where all the heavy lifting takes place, all inline as well.&lt;/li&gt;&lt;/ul&gt; &lt;p&gt;...and there you have it, a fully fledged multithreaded data collector. Not bad for a few hours' work. It's not finished though, as there are plenty of things you'd want to add to it. For example:&lt;/p&gt; &lt;ul&gt;&lt;li&gt;More exception handling, especially KeyboardInterrupt and other signals. I would advise importing the &lt;code&gt;signal&lt;/code&gt; module, then wrapping our final loop in a big try/except block, catching KeyboardInterrupt. This would need to empty the job and processing queues, which will cause your threads to exit gracefully, and then your program will exit too.&lt;/li&gt;&lt;li&gt;Better support for multi-core machines. The Python &lt;a href="http://docs.python.org/api/threads.html"&gt;global interpreter lock&lt;/a&gt; prevents more than one thread from running at a time, however you can still spread your threads over multiple cores (I believe). What you might want to consider is throwing a few &lt;code&gt;os.fork()&lt;/code&gt; calls in, forcing your program to multiprocess, which could then take full advantage of multiple cores. You can do this by either dividing up your work queue at the start, or by moving the heavy lifting out of the main thread and into seperate processes. Your main thread could then communicate with these "worker" processes via shared memory or sockets (my preference) then pass back the results. Make sure your &lt;code&gt;functions.parse_feed&lt;/code&gt; can produce picklable objects.&lt;/li&gt;&lt;li&gt;You could use GUI programming or ncurses to provide a progress bar, by inspecting the value of &lt;code&gt;jobs.qsize()&lt;/code&gt;&lt;/li&gt;&lt;/ul&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-7522740066314437535?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/7522740066314437535/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=7522740066314437535' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/7522740066314437535'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/7522740066314437535'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/threaded-data-collection-with-python.html' title='Threaded data collection with Python'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-6956502379088069446</id><published>2007-01-05T12:41:00.000-08:00</published><updated>2007-01-05T12:42:27.147-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='JSON'/><title type='text'>How to ensure_ascii=False when using JSON dumps()</title><content type='html'>&lt;span style="font-size:100%;"&gt;simplejson is very well written, but there's a catch. If you're doing UTF-8, make sure you ensure_ascii=False whenever calling dumps(). I don't know what it is with it that it will give you funky&lt;span style="font-weight: bold;"&gt; \uSOMETHING&lt;/span&gt; sequences for every non-ascii byte, and although that may display&lt;br /&gt;right in HTML/JS, you should want to keep raw UTF-8 for better interoperability (specially with other JSON parsers). &lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-6956502379088069446?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/6956502379088069446/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=6956502379088069446' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6956502379088069446'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6956502379088069446'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/how-to-ensureasciifalse-when-using-json.html' title='How to ensure_ascii=False when using JSON dumps()'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8489849311347853291</id><published>2007-01-04T19:03:00.000-08:00</published><updated>2007-01-04T19:13:44.535-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Fix autonumbering sequences when backing a single table in postgres</title><content type='html'>yea, the sequences are never really captured on a database dump of just one table, need to do an entire backup.&lt;br /&gt;in general, when i want to move a 1 table, i use the \d table_name;&lt;br /&gt;then i write / copy the structure,&lt;br /&gt;then create it with another psql to other db.&lt;br /&gt;&lt;br /&gt;in general, when i want to move a 1 table, i use the \d table_name;&lt;br /&gt;then i write / copy the structure,&lt;br /&gt;then create it with another psql to other db.&lt;br /&gt;&lt;br /&gt;So to fix id problems when backing a table :&lt;br /&gt;select max (id) from table;&lt;br /&gt;&lt;br /&gt;alter sequence my_sequence restart &lt;span style="font-weight: bold;"&gt;maxid+1&lt;/span&gt;;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;the thing about a table is in the general can have (constraints, such as primary keys, foreign keys, that reference other tables), indexes(for finding data faster), triggers (calls a stored procedure), and rules (that rewrite operations based on data, like triggers)&lt;br /&gt;but most people just have tables, maybe with a primary key default nextval('a_sequnce'), so it is a lot simpler&lt;br /&gt;so in psql&lt;br /&gt;\d tablename&lt;br /&gt;dumps the structure of the table to the text console&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;---------&lt;/span&gt;&lt;br /&gt;well like here&lt;br /&gt;\d task&lt;br /&gt;                                  Table "task.task"&lt;br /&gt;    Column    |      Type        |                    Modifiers&lt;br /&gt;----------------+-------------------+---------------------------------------------------&lt;br /&gt; id            | bigint            | not null default nextval('task_id_seq'::regclass)&lt;br /&gt; entry_date    | date              |&lt;br /&gt; completed_date | date              |&lt;br /&gt; project_id    | bigint            |&lt;br /&gt; name          | character varying |&lt;br /&gt; description    | text              |&lt;br /&gt; parent_id      | bigint            |&lt;br /&gt;Indexes:&lt;br /&gt;    "task_pk" PRIMARY KEY, btree (id)&lt;br /&gt;Foreign-key constraints:&lt;br /&gt;    "_parent_id" FOREIGN KEY (parent_id) REFERENCES task(id)&lt;br /&gt;    "_project_id" FOREIGN KEY (project_id) REFERENCES project(id)&lt;br /&gt;this shows me the table "task" contains id, entry_date, completed_date,,...&lt;br /&gt;and their types, an then i see the constriants, like what foreign key it references, so i know that this table depends on another table.&lt;br /&gt;i then have a couple minutes in a text editor and make this into&lt;br /&gt;CREATE TABLE TASK (ID BIGINT NOT NULL DEFAULT NEXTVAL('task_id_seq'),&lt;br /&gt;  entry_date date,&lt;br /&gt;etc.&lt;br /&gt;then use that to create teh table in new db.&lt;br /&gt;i guess it is hte manual old-school way to d it.&lt;br /&gt;then when the new table exists,&lt;br /&gt;(in both new, and old databases,)&lt;br /&gt;i can type&lt;br /&gt;psql -h host1 -U user1 -c "copy task to stdout csv" | psql -h host2 -U user2 -c "copy task from stdin csv"&lt;br /&gt;or i guess you can do a copy out to a file first, and then copy into the second db by cat the file.&lt;br /&gt;like how you would install that country data, i emailed to you.&lt;br /&gt;for more info on the copy command, "\h copy" from the psql prompt&lt;br /&gt;lol, see, thats why i never use a "GUI", becuse the command line is always better than any GUI.&lt;br /&gt;so then after i get the table created, i do the create sequence task_id_seq start nnnnn;&lt;br /&gt;actually, if you ran the sql propertly, it doesnt let you create the table without having the sequence ther..&lt;br /&gt;so i would then have the sequence manually created, and to start at next higher value.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8489849311347853291?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8489849311347853291/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8489849311347853291' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8489849311347853291'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8489849311347853291'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2007/01/fix-autonumbering-sequences-when.html' title='Fix autonumbering sequences when backing a single table in postgres'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-4858468531660430180</id><published>2006-12-30T23:30:00.001-08:00</published><updated>2006-12-30T23:40:59.981-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>DISTINT 300ms grouby 1300ms for 10 entries</title><content type='html'>recentlycommentedposts = web.query('SELECT DISTINCT ON(comments.postid) comments.postid, comments.comment, posts.title, posts.id FROM comments JOIN posts on posts.id=comments.postid order by comments.postid DESC limit 10')&lt;br /&gt;      &lt;span style="font-weight: bold;"&gt;recentlycommentedposts = web.query('''select postid, max(created) as created &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;        from comments &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;        group by postid &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;        order by created desc &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;        limit 10;''')&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(102, 0, 0);"&gt;select  p.id, p.title, p.created as post_date, c.comment, a.created as comment_date &lt;/span&gt;&lt;br /&gt;&lt;span style="color: rgb(102, 0, 0);"&gt;from posts p, &lt;/span&gt;&lt;br /&gt;&lt;span style="color: rgb(102, 0, 0);"&gt;    comments c, &lt;/span&gt;&lt;br /&gt;&lt;span style="color: rgb(102, 0, 0);"&gt;    (select postid, max(created) as created &lt;/span&gt;&lt;br /&gt;&lt;span style="color: rgb(102, 0, 0);"&gt;    from comments &lt;/span&gt;&lt;br /&gt;&lt;span style="color: rgb(102, 0, 0);"&gt;    group by postid &lt;/span&gt;&lt;br /&gt;&lt;span style="color: rgb(102, 0, 0);"&gt;    order by created desc &lt;/span&gt;&lt;br /&gt;&lt;span style="color: rgb(102, 0, 0);"&gt;    limit 10) a &lt;/span&gt;&lt;br /&gt;&lt;span style="color: rgb(102, 0, 0);"&gt;where &lt;/span&gt;&lt;br /&gt;&lt;span style="color: rgb(102, 0, 0);"&gt;  p.id = a.postid &lt;/span&gt;&lt;br /&gt;&lt;span style="color: rgb(102, 0, 0);"&gt;  and c.postid = a.postid &lt;/span&gt;&lt;br /&gt;&lt;span style="color: rgb(102, 0, 0);"&gt;  and c.created = a.created &lt;/span&gt;&lt;br /&gt;&lt;span style="color: rgb(102, 0, 0);"&gt;order by post_date desc;&lt;/span&gt;&lt;br /&gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-4858468531660430180?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/4858468531660430180/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=4858468531660430180' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4858468531660430180'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4858468531660430180'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/1000-times-better-than-distinct-on.html' title='DISTINT 300ms grouby 1300ms for 10 entries'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-7548884492244292697</id><published>2006-12-30T17:40:00.001-08:00</published><updated>2006-12-30T17:40:50.778-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='apache'/><title type='text'>"htpasswd" and /etc/httpd/conf/httpd.conf</title><content type='html'>there is probably a &lt;directory&gt;&lt;br /&gt;so anywhere after athat &lt;/directory&gt;&lt;br /&gt;after that change, we need to make the /www/svn_new/main/auth.conf file&lt;br /&gt;the "htpasswd" commmand does this&lt;br /&gt;from shell,&lt;br /&gt;cd /www/svn_new/main/&lt;br /&gt;htpasswd -c auth.conf travis&lt;br /&gt;(-c creates if not exists)&lt;br /&gt;then to add future users, for example,&lt;br /&gt;htpasswd auth.conf travis&lt;br /&gt;5:27:38 pm&lt;br /&gt;mark &lt;br /&gt;htpasswd -c /etc/httpd/repo_passwd user&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-7548884492244292697?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/7548884492244292697/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=7548884492244292697' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/7548884492244292697'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/7548884492244292697'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/htpasswd-and-etchttpdconfhttpdconf.html' title='&quot;htpasswd&quot; and /etc/httpd/conf/httpd.conf'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8648705117907602877</id><published>2006-12-30T14:14:00.001-08:00</published><updated>2006-12-30T14:14:39.560-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Postgresql VIEWs will save your day</title><content type='html'>oh yea, thats right mysql sucks ass for the complex queries, like even &lt;br /&gt;select * from users where company_id in (select id from company where company_name='foo') ;&lt;br /&gt;is that a join&lt;br /&gt;Travis :&lt;br /&gt;yea, it could be done with a join too&lt;br /&gt;the above was running the output of one select into the input of a second.&lt;br /&gt;  :&lt;br /&gt;i m doing a double join&lt;br /&gt;i think it is super slow&lt;br /&gt;2:04:33 pm&lt;br /&gt;Travis &lt;br /&gt;a join would be &lt;br /&gt;select user.* &lt;br /&gt;from users u, company c&lt;br /&gt;where u.company_id = c.id&lt;br /&gt;and c.company_name='foo'&lt;br /&gt;i found with postgresql, if i was always running this sort of query&lt;br /&gt;then i create a view&lt;br /&gt;===================================&lt;br /&gt;create or replace view v_company_users as&lt;br /&gt;select user.* , c.company_name&lt;br /&gt;from users u, company c&lt;br /&gt;where u.company_id = c.id;&lt;br /&gt;2:05:34 pm&lt;br /&gt;mark &lt;br /&gt;wat os a voew&lt;br /&gt;2:05:40 pm&lt;br /&gt;Travis &lt;br /&gt;then my app does &lt;br /&gt;select * from v_company_uses where company='foo'&lt;br /&gt;so, instead of doing a join query and specifying parameters, create a view, and the view barfs out the values and you query the where on the view.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8648705117907602877?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8648705117907602877/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8648705117907602877' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8648705117907602877'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8648705117907602877'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/postgresql-views-will-save-your-day.html' title='Postgresql VIEWs will save your day'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-732454083100159275</id><published>2006-12-30T05:44:00.000-08:00</published><updated>2006-12-30T05:45:04.291-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Rebuilding large GIN indexes</title><content type='html'>&lt;h4 class="serendipity_title"&gt;&lt;a href="http://people.planetpostgresql.org/mha/index.php?/archives/120-Rebuilding-large-GIN-indexes.html"&gt;Rebuilding large GIN indexes&lt;/a&gt;&lt;/h4&gt;                                   &lt;span class="serendipity_entryIcon"&gt;                                                 &lt;a href="http://people.planetpostgresql.org/mha/index.php?/categories/2-PostgreSQL"&gt;&lt;img class="serendipity_entryIcon" title="PostgreSQL" alt="PostgreSQL" src="http://people.planetpostgresql.org/mha/uploads/icons/pgsql.gif" /&gt;&lt;/a&gt;                                         &lt;/span&gt;                          &lt;div class="serendipity_entry_body"&gt;                 takes time. &lt;i&gt;A lot&lt;/i&gt; of time. With rebuilding, I mean when you for example have to UPDATE all your tsvector fields due to a change in tsearch2 configuration. (Yes, I had to do that because I had a slightly incorrect tsearch2 configuration for the archives search database). So don't do it. Instead use the fact that PostgreSQL has nice transactional DDL and do something like this:&lt;br /&gt;&lt;code style="white-space: pre;"&gt;&lt;br /&gt;&lt;br /&gt;BEGIN TRANSACTION;&lt;br /&gt;&lt;br /&gt;CREATE TABLE messages_new AS SELECT id,txt,to_tsvector(txt) AS fti&lt;br /&gt;&lt;br /&gt; FROM messages;&lt;br /&gt;&lt;br /&gt;CREATE INDEX messages_new_fti ON messages_new USING gin(fti);&lt;br /&gt;&lt;br /&gt;ANALYZE messags_new;&lt;br /&gt;&lt;br /&gt;ALTER TABLE messages RENAME TO messages_old;&lt;br /&gt;&lt;br /&gt;ALTER TABLE messages_new RENAME TO messages;&lt;br /&gt;&lt;br /&gt;COMMIT;&lt;br /&gt;&lt;br /&gt;DROP TABLE messages_old;&lt;br /&gt;&lt;br /&gt;ALTER INDEX messages_new_fti RENAME TO messages_fti;&lt;br /&gt;&lt;br /&gt;&lt;/code&gt;&lt;br /&gt;(apologies for any typos, I didn't bother to actually type these commands into the database again, and I lost my cut-and-paste of what I ran)&lt;br /&gt;&lt;br /&gt;This way, the messages table can still serve up searches without any disruption to the searches at all. And creating the new index is a &lt;i&gt;lot&lt;/i&gt; faster than updating the existing one if you have to touch all rows.             &lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-732454083100159275?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/732454083100159275/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=732454083100159275' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/732454083100159275'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/732454083100159275'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/rebuilding-large-gin-indexes.html' title='Rebuilding large GIN indexes'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-3322493445236714495</id><published>2006-12-30T05:00:00.001-08:00</published><updated>2006-12-30T05:00:34.174-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>Getting random rows from a database table</title><content type='html'>&lt;h4 class="serendipity_title"&gt;&lt;a href="http://people.planetpostgresql.org/greg/index.php?/archives/40-Getting-random-rows-from-a-database-table.html"&gt;Getting random rows from a database table&lt;/a&gt;&lt;/h4&gt;                                   &lt;span class="serendipity_entryIcon"&gt;                                                                                                             &lt;/span&gt;                          &lt;div class="serendipity_entry_body"&gt;                 &lt;p&gt;Selecting random rows from a table in your database is generally useful for two things: grabbing one or more rows to display and/or use somehow, and for selecting a random subset of your rows and performing some sort of statistical analysis on the data. While the standard way of using &lt;span style="color: rgb(68, 0, 0); font-weight: bold;"&gt;ORDER BY RANDOM()&lt;/span&gt; is occassionally useful, it is very slow, it is non-repeatable, and it does not scale well. I'll demonstrate some better methods to get random rows.&lt;/p&gt;              &lt;/div&gt;                          &lt;p&gt;For this article, I'll be using a table named &lt;span style="font-weight: bold; color: rgb(0, 102, 0); font-size: 105%;"&gt;mydata&lt;/span&gt; which contains ten million rows of data, and a primary key named &lt;span style="font-weight: bold; color: rgb(0, 102, 0);"&gt;id&lt;/span&gt;, which is of type &lt;a href="http://people.planetpostgresql.org/greg/exit.php?url_id=165&amp;entry_id=40" title="http://www.postgresql.org/docs/8.1/interactive/datatype.html#DATATYPE-NUMERIC" onmouseover="window.status='http://www.postgresql.org/docs/8.1/interactive/datatype.html#DATATYPE-NUMERIC';return true;" onmouseout="window.status='';return true;"&gt;bigint&lt;/a&gt;.&lt;/p&gt;  &lt;p&gt;First, it's important to distinguish between random and unordered. If you simply pull rows from your table without an ORDER BY clause, they may &lt;span style="font-style: italic;"&gt;appear&lt;/span&gt; random, but they are not: they are simply in an undefined order. In PostgreSQL, they will be roughly in an order related to the last time they were updated or inserted. However, by "random" we really mean that any row in the table has as much chance as appearing as another row within our SELECT statement. We'll need some way to accomplish this is SQL.&lt;/p&gt;  &lt;hr /&gt;&lt;h3&gt;ORDER BY RANDOM()&lt;/h3&gt; &lt;p&gt;How do we get something "random" into our query? Every modern computer has some way of generating a random number, and PostgreSQL has a way as well: the built in &lt;a href="http://people.planetpostgresql.org/greg/exit.php?url_id=166&amp;entry_id=40" title="http://www.postgresql.org/docs/current/interactive/functions-math.html" onmouseover="window.status='http://www.postgresql.org/docs/current/interactive/functions-math.html';return true;" onmouseout="window.status='';return true;"&gt;RANDOM() function&lt;/a&gt;, which generates a &lt;a href="http://people.planetpostgresql.org/greg/exit.php?url_id=167&amp;entry_id=40" title="http://www.postgresql.org/docs/current/interactive/datatype.html#DATATYPE-NUMERIC" onmouseover="window.status='http://www.postgresql.org/docs/current/interactive/datatype.html#DATATYPE-NUMERIC';return true;" onmouseout="window.status='';return true;"&gt;double precision number&lt;/a&gt; from from 0.0 to 1.0:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;SELECT RANDOM() FROM generate_series(1,5);&lt;/div&gt; &lt;p&gt;Running the above yields something like this:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(221, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;        random&lt;br /&gt;-------------------&lt;br /&gt; 0.739388620946556&lt;br /&gt; 0.706028273329139&lt;br /&gt; 0.150622034911066&lt;br /&gt; 0.396196706686169&lt;br /&gt; 0.412912936415523&lt;br /&gt;&lt;/div&gt; &lt;p&gt;(Note the use of the nifty new &lt;span style="font-weight: bold; color: rgb(0, 102, 0); font-size: 105%;"&gt;&lt;a href="http://people.planetpostgresql.org/greg/exit.php?url_id=168&amp;entry_id=40" title="http://www.postgresql.org/docs/current/interactive/functions-srf.html" onmouseover="window.status='http://www.postgresql.org/docs/current/interactive/functions-srf.html ';return true;" onmouseout="window.status='';return true;"&gt;generate_series()&lt;/a&gt;&lt;/span&gt; function to repeat a SQL command a certain number of times).&lt;/p&gt;  &lt;p&gt;PostgreSQL also allows you to use RANDOM() in the ORDER BY clause, which is one way to get a random row from the database. Let's pull out three random values from our test table:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;SELECT id FROM mydata ORDER BY RANDOM() LIMIT 3;&lt;/div&gt;&lt;br /&gt;&lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(221, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;    id&lt;br /&gt;---------&lt;br /&gt; 3635389&lt;br /&gt; 9417084&lt;br /&gt; 8385175&lt;br /&gt;&lt;/div&gt;  &lt;p&gt;This appears to work just fine, but it has a major drawback - it does not scale, and gets extremely slow as the table size increases. This is a consequence of how ORDER BY RANDOM() works - it basically assigns a random number to &lt;span style="font-weight: bold; font-style: italic;"&gt;every row&lt;/span&gt; in the database, then orders the entire table by the random numbers, and then returns the rows you want. For small tables, this is not much of a problem, but this is a terrible solution as the tables grow in size. Here's a breakdown on speeds on my system for grabbing a single row by using the query &lt;span style="background: rgb(238, 255, 255) none repeat scroll 0% 50%; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold;"&gt;SELECT id FROM mydata ORDER BY RANDOM() LIMIT 1&lt;/span&gt;:&lt;/p&gt;  &lt;table style="background: rgb(255, 255, 238) none repeat scroll 0% 50%; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; margin-left: 1em;" border="1" cellpadding="2" cellspacing="2"&gt; &lt;tbody&gt;&lt;tr&gt;&lt;th&gt;Number of rows&lt;/th&gt;&lt;th&gt;Time to run&lt;/th&gt;&lt;/tr&gt;&lt;tr&gt; &lt;td&gt;One thousand (1000)&lt;/td&gt;&lt;td&gt;3 milliseconds&lt;/td&gt;&lt;/tr&gt; &lt;tr&gt;&lt;td&gt;Ten thousand (10,000)&lt;/td&gt;&lt;td&gt;40 milliseconds&lt;/td&gt;&lt;/tr&gt; &lt;tr&gt;&lt;td&gt;One hundred thousand (100,000)&lt;/td&gt;&lt;td&gt;Half a second&lt;/td&gt;&lt;/tr&gt; &lt;tr&gt;&lt;td&gt;One million (1,000,000)&lt;/td&gt;&lt;td&gt;7 seconds&lt;/td&gt;&lt;/tr&gt; &lt;tr&gt;&lt;td&gt;Ten million (10,000,000)&lt;/td&gt;&lt;td&gt;149 seconds&lt;/td&gt;&lt;/tr&gt; &lt;/tbody&gt;&lt;/table&gt;  &lt;p&gt;Fortunately, there are much better ways to obtain random rows. There are two basic approaches to take - we can pick randomly from a range of values, or we can store a random number inside the table itself.&lt;/p&gt;  &lt;hr /&gt;&lt;h3&gt;Range of Values&lt;/h3&gt; &lt;p&gt;Let's keep using our &lt;span style="font-weight: bold;"&gt;mydata&lt;/span&gt; table, which has a primary key of &lt;span style="font-weight: bold;"&gt;id&lt;/span&gt;. If we know enough information about a column in the database, we can use that to get random rows by picking random values of that column. In this example, all we need to know is the minimum and maximum value of the id column, and we can have an external program generate a random number between the minimum and the maximum and put it into a query:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%;"&gt;SELECT * FROM mydata WHERE id = '4012341';&lt;/div&gt;  &lt;p&gt;We can also have the database help us choose the number, if we know there are a maximum of 10 million ids:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;SELECT * FROM mydata&lt;br /&gt;WHERE id =&lt;br /&gt;  (SELECT (RANDOM() * 10000000)::int OFFSET 0)&lt;br /&gt;LIMIT 1;&lt;/div&gt; &lt;p&gt;Both run in under a second, as the primary key column id is indexed. (The use of &lt;span style="background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-weight: bold; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial;"&gt;OFFSET 0&lt;/span&gt; is needed in the second query to force the planner to evaluate RANDOM() only one time).&lt;/p&gt;  &lt;p&gt;There are a few problems with this approach, however. One obvious one is that the query above may fail if there are any "holes" in the range of numbers from min to max. Storing information about where the holes are is probably impractical, but we can get around it by finding the value that is &lt;span style="font-style: italic;"&gt;closest&lt;/span&gt; to the random number we picked, like this:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;SELECT * FROM mydata&lt;br /&gt;WHERE id &gt;= '4012341'&lt;br /&gt;LIMIT 1;&lt;/div&gt;  &lt;p&gt;While that query addresses the problem of holes, it has two additional problems: it does not guarantee that the same row is returned each time, and it sometimes runs very, very slow. Running an &lt;a href="http://people.planetpostgresql.org/greg/exit.php?url_id=169&amp;entry_id=40" title="http://www.postgresql.org/docs/current/interactive/sql-explain.html" onmouseover="window.status='http://www.postgresql.org/docs/current/interactive/sql-explain.html';return true;" onmouseout="window.status='';return true;"&gt;EXPLAIN plan&lt;/a&gt; shows us why the speed difference:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(221, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;         QUERY PLAN&lt;br /&gt;----------------------------------------&lt;br /&gt;Limit  (cost=0.00..0.02 rows=1)&lt;br /&gt;  -&gt;  Seq Scan on mydata &lt;br /&gt;   (cost=0.00..223040.00 rows=9996117)&lt;br /&gt;        Filter: (id &gt;= 4012341::bigint)&lt;br /&gt;&lt;/div&gt;  &lt;p&gt;The index is not being used. As a good &lt;a href="http://people.planetpostgresql.org/greg/exit.php?url_id=170&amp;entry_id=40" title="http://en.wikipedia.org/wiki/Rule_of_thumb" onmouseover="window.status='http://en.wikipedia.org/wiki/Rule_of_thumb';return true;" onmouseout="window.status='';return true;"&gt;rule of thumb&lt;/a&gt;, never use a LIMIT without an ORDER BY clause. Let's add one in, which will solve both of our problems. The index will be used, and the results will be predictable:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;SELECT * FROM mydata&lt;br /&gt;WHERE id &gt;= '4012341'&lt;br /&gt;ORDER BY id LIMIT 1;&lt;br /&gt;&lt;/div&gt;  &lt;p&gt;This strategy of using "&gt;= (value) ORDER BY (column) LIMIT 1" is one which we will us a lot from this point forward.&lt;/p&gt;  &lt;p&gt;Another problem is that we are not guaranteed to get the number of rows that we want. For example:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;SELECT * FROM mydata&lt;br /&gt;WHERE id &gt;= '9999999'&lt;br /&gt;ORDER BY id LIMIT 100;&lt;br /&gt;&lt;/div&gt;  &lt;p&gt;This will only return 2 rows since our sample data has a maximum id of ten million. There are two ways around this problem: you can re-run the query with a new random number until you get the number of random rows you need, or you can adjust your random number (or your table) to make sure that you always have at least that many. For example, if your data has 100 rows and you want to pull 10 of them at random, then make sure you never ask for an id of more than 90. Alternatively, you could "pad" your table with 10 extra rows, and then safely use the numbers 1-100.&lt;/p&gt;  &lt;p&gt;There is one final problem: picking our own random values from range will not produce truly random rows unless the data is perfectly uniformly distributed. Consider a table with two rows and values of 1 and 10. Our strategy above would cause the 10 value to appear more often than the 1 value, which is not the randomness we are looking for. In addition to the holes, if the values are not unique, then the distribution may not be uniform, and we once again lack true randomness. We need a way to combine the true randomness of ORDER BY RANDOM() with the speed of a Range of Values.&lt;/p&gt;  &lt;hr /&gt;&lt;h3&gt;Random Column&lt;/h3&gt; &lt;p&gt;The final and best solution is to create a new column in your database that stores random values. The table can then be sorted by this column, and get back random rows in a fast, repeatable, and truly random way. What we are basically doing is emulating the effect of ORDER BY RANDOM(), which as you recall creates a random value for each row in the database. Let's apply it to our test table.&lt;/p&gt;  &lt;p&gt;First, we create a new column to hold the random values. Since RANDOM() returns the type "double precision", we create a new column of that type. We'll name it &lt;span style="font-weight: bold; color: rgb(0, 102, 0);"&gt;myrand&lt;/span&gt;:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%;"&gt;ALTER TABLE mydata ADD myrand DOUBLE PRECISION;&lt;/div&gt;  &lt;p&gt;Now we can populate that row with a random number from 0.0 to 1.0:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;BEGIN;&lt;br /&gt;UPDATE mydata SET myrand = RANDOM();&lt;br /&gt;COMMIT;&lt;br /&gt;&lt;/div&gt;  &lt;p&gt;This does take a non-trivial amount of time to run (372 seconds to populate all ten million rows), but it is a one-time cost. Since we'll be hitting this column to generate our random rows, we should put an index on it as well. But before we do that, we have to also ensure that our results are reproducible. In other words, the same query should return the same exact rows. Something like this is &lt;span style="font-weight: bold;"&gt;not&lt;/span&gt; guaranteed to get the same 10 rows each time it is run:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;SELECT * FROM mydata&lt;br /&gt;ORDER BY myrand LIMIT 10;&lt;/div&gt;  &lt;p&gt;Why? Because there is no unique constraint on the myrand column, and it is possible (especially with our 10 million row example table) that two myrand columns contain the same value. As another rule of thumb, always make sure your ORDER BY clause specifies a unique set of rows. Our primary key, "id", is unique, so that makes a good backup for when our myrand column happens to have the same value. Our new query becomes:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;SELECT * FROM mydata&lt;br /&gt;ORDER BY myrand, id LIMIT 10;&lt;br /&gt;&lt;/div&gt;  &lt;p&gt;Now we can create the index, on both of the columns in that ORDER BY. For good measure, we'll analyze the table as well:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;CREATE INDEX myrand_randomhelp ON mydata(myrand,id);&lt;br /&gt;&lt;br /&gt;ANALYZE VERBOSE mydata;&lt;br /&gt;&lt;/div&gt;  &lt;p&gt;Before the index was in place, the query to grab a random row took over 180 seconds. Now that it is in place, the query runs in less than 1 second (126 milliseconds).&lt;/p&gt;  &lt;p&gt;So that's our basic "Random Column" strategy: assign each row a random number, make sure it is linked to another unique column, and make an index across both of them. This allows us to get fast, repeatable, and truly random rows. You can also ensure that new rows get a new random value automatically added to them by doing this:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;ALTER TABLE mydata ALTER myrand&lt;br /&gt;SET NOT NULL DEFAULT RANDOM();&lt;br /&gt;&lt;/div&gt;  &lt;p&gt;If you don't care about repeatability, and simply want to grab a random row, you can do this:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;SELECT * FROM mydata&lt;br /&gt;WHERE myrand &gt;= (SELECT RANDOM() OFFSET 0)&lt;br /&gt;ORDER BY myrand ASC LIMIT 1;&lt;br /&gt;&lt;/div&gt;  &lt;p&gt;The ORDER BY clause is needed to ensure that our index is used. Note that Postgres has no problem using our previous index we created on both columns, because we put the myrand column first inside of that index. The above query is basically what &lt;a href="http://people.planetpostgresql.org/greg/exit.php?url_id=171&amp;entry_id=40" title="http://www.wikipedia.org/" onmouseover="window.status='http://www.wikipedia.org/';return true;" onmouseout="window.status='';return true;"&gt;Wikipedia&lt;/a&gt; uses when you click on the "Random Page" link.&lt;/p&gt;  &lt;p&gt;Another advantage to using a Random Column is that not are the results reproducible, they are resettable. Let's say that you are using this method to pull 100 random rows at time out of a table with 1000 rows for statistical analysis. You also want to make sure that you never use the same row more than once, so you use an OFFSET:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%; white-space: pre;"&gt;SELECT * FROM mydata ORDER BY myrand, id&lt;br /&gt;  LIMIT 100 OFFSET 1;&lt;br /&gt;SELECT * FROM mydata ORDER BY myrand, id&lt;br /&gt;  LIMIT 100 OFFSET 100;&lt;br /&gt;SELECT * FROM mydata ORDER BY myrand, id&lt;br /&gt;  LIMIT 100 OFFSET 200;&lt;br /&gt;&lt;span style="font-weight: normal;"&gt;etc...&lt;/span&gt; &lt;/div&gt;  &lt;p&gt;(Note: although offset starts at 0, we ignore the first column as OFFSET 100 is easier to read then OFFSET 99). At some point, you want to run some more tests, but you don't want the same grouping as before. In other words, you want to reshuffle the deck of cards. Simple enough, just assign new values to the 'myrand' column:&lt;/p&gt;  &lt;div style="border: 1px solid rgb(136, 136, 136); padding: 1em; background: rgb(238, 238, 238) none repeat scroll 0% 50%; font-family: Courier,monospace; margin-right: 5em; margin-left: 1em; color: black; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial; font-weight: bold; font-size: 110%; line-height: 130%;"&gt;UPDATE mydata SET myrand = RANDOM();&lt;/div&gt;  &lt;p&gt;The only drawback to the whole Random Column strategy is the time and effort it takes to set it up, and the additional disk space needed to handle the extra column. Because of the extra column, INSERTS and UPDATES may run &lt;span style="font-style: italic;"&gt;slightly&lt;/span&gt; slower.&lt;/p&gt;  &lt;p&gt;Here's a summary of the three strategies to grab some random rows from a table:&lt;/p&gt;  &lt;table style="background: rgb(255, 255, 238) none repeat scroll 0% 50%; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial;" border="1"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;th&gt;Technique&lt;/th&gt;&lt;th&gt;Pros&lt;/th&gt;&lt;th&gt;Cons&lt;/th&gt;&lt;th&gt;When to use&lt;/th&gt;&lt;/tr&gt;&lt;tr&gt;&lt;th&gt;ORDER BY RANDOM()&lt;/th&gt;&lt;td&gt;&lt;li&gt;Completely random&lt;/li&gt;&lt;li&gt;No table changes needed&lt;/li&gt;&lt;li&gt;Easy to append to existing queries&lt;/li&gt;&lt;/td&gt;&lt;td&gt;&lt;li&gt;Very slow: does not scale&lt;/li&gt;&lt;li&gt;Non-repeatable result sets&lt;/li&gt;&lt;/td&gt;&lt;td&gt;Quick ad-hoc queries and very small tables that will not grow large&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;th&gt;Range of Values&lt;/th&gt;&lt;td&gt;&lt;li&gt;Very fast&lt;/li&gt;&lt;li&gt;Uses existing columns&lt;/li&gt;&lt;/td&gt;&lt;td&gt;&lt;li&gt;Not truly random&lt;/li&gt;&lt;li&gt;Must track minimum and maximum&lt;/li&gt;&lt;li&gt;Hard to get desired number of rows&lt;/li&gt;&lt;li&gt;Inserts can affect results&lt;/li&gt;&lt;/td&gt;&lt;td&gt;When data is very well-defined and stable (even then, be cautious)&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;th&gt;Random Column&lt;/th&gt;&lt;td&gt;&lt;li&gt;Very fast&lt;/li&gt;&lt;li&gt;Truly random&lt;/li&gt;&lt;li&gt;Reproducible&lt;/li&gt;&lt;li&gt;Automatically maintained&lt;/li&gt;&lt;li&gt;Resettable&lt;/li&gt;&lt;li&gt;Column data type matches random() for easy use&lt;/li&gt;&lt;/td&gt;&lt;td&gt;&lt;li&gt;Takes up disk space&lt;/li&gt;&lt;li&gt;Initial setup cost&lt;/li&gt;&lt;li&gt;Inserts may be slowed by the extra column and the default value&lt;/li&gt;&lt;/td&gt;&lt;td&gt;Whenever possible&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-3322493445236714495?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/3322493445236714495/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=3322493445236714495' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3322493445236714495'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/3322493445236714495'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/getting-random-rows-from-database-table.html' title='Getting random rows from a database table'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2094291148535921643</id><published>2006-12-27T15:39:00.000-08:00</published><updated>2006-12-27T15:41:51.802-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='python'/><title type='text'>index of max value in a list PYTHON</title><content type='html'>&lt;span style="font-weight:bold;"&gt; the max function, it returns the maximum value in the&lt;br /&gt; list rather than the index associated with that value.&lt;br /&gt;&lt;br /&gt; How do I return the index?&lt;/span&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;l.index(max(l)&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;l.index(max(l)) will give you the index of the first occurrance of the&lt;br /&gt;maximum.&lt;br /&gt;&lt;br /&gt;m = max(l)&lt;br /&gt;[ i for i,v in enumerate(l) if v==m ]&lt;br /&gt;&lt;br /&gt;will give you a list of all indices where the max occurs. (Putting the&lt;br /&gt;'max(l)' outside the list comprehension prevents it from being evaluated&lt;br /&gt;for each loop element.)&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2094291148535921643?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2094291148535921643/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2094291148535921643' title='3 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2094291148535921643'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2094291148535921643'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/index-of-max-value-in-list-python.html' title='index of max value in a list PYTHON'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>3</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-4995042153345669795</id><published>2006-12-20T16:02:00.000-08:00</published><updated>2006-12-20T16:03:56.941-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>autoVACUUM in postgres</title><content type='html'>&lt;span style="font-weight: bold;"&gt;stats_start_collector = on &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;stats_row_level = on &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;autovacuum = on  &lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;autovacuum_naptime = 120&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;in postgresql.conf and&lt;br /&gt;kill -1 the &lt;span style="font-weight: bold;"&gt;pid for postgres&lt;br /&gt;using cat postmaster.pid&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-4995042153345669795?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/4995042153345669795/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=4995042153345669795' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4995042153345669795'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4995042153345669795'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/autovacuum-in-postgres.html' title='autoVACUUM in postgres'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-856389123920955044</id><published>2006-12-20T15:57:00.000-08:00</published><updated>2006-12-20T15:59:54.815-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>How to reload postgres config files without restarting db on the LIVE</title><content type='html'>you can change it on the live service and kill -1 &lt;postmaster_pid&gt; to make it take effect&lt;br /&gt;we should not kill postgres right&lt;br /&gt;yea, if you edit postmaster.conf and change max_connections=512 then it should not require stopping and starting .&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;cat postmaster.pid&lt;/span&gt;&lt;br /&gt;database]$ cat postmaster.pid&lt;br /&gt;32623&lt;br /&gt;/work/database&lt;br /&gt;database]$ cat postmaster.pid&lt;br /&gt;32623&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;so pid 32623 is your postmaster pid f&lt;/span&gt;&lt;br /&gt;so if you kill -1 32623 &lt;br /&gt;after you have done the changes to postgresql.conf file&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;that will make postmaster re-read the config file without shutting it down&lt;/span&gt;&lt;br /&gt;i.e. &lt;span style="font-weight: bold;"&gt;existing connections dont get dropped.&lt;/span&gt;&lt;br /&gt;ur sure it wont kill the db like last time&lt;br /&gt;and how to check if it has read the new postgresql.conf&lt;br /&gt;i guess if you tail -f the log file.&lt;br /&gt;the active postgresql log file (in pg_log folder) will say "reloading.."&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;received SIGHUP, reloading configuration files&lt;/span&gt;&lt;br /&gt;if you had a really bad typo , then the database would either ignore the changes, or safely shutitself down.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-856389123920955044?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/856389123920955044/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=856389123920955044' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/856389123920955044'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/856389123920955044'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/how-to-reload-postgres-config-files.html' title='How to reload postgres config files without restarting db on the LIVE'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-1930532533347248867</id><published>2006-12-19T20:55:00.000-08:00</published><updated>2006-12-19T20:56:30.520-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='gaim dbus'/><title type='text'>Fixing dbus problems with gaim</title><content type='html'>r&lt;strong&gt;pm -ql dbus | grep lib&lt;/strong&gt;&lt;br/&gt;/lib/dbus-1.0&lt;br/&gt;/lib/libdbus-1.so.3&lt;br/&gt;/lib/libdbus-1.so.3.2.0&lt;br/&gt;/var/lib/dbus&lt;br/&gt; cd /lib/&lt;br/&gt;&lt;strong&gt;sudo ln -s libdbus-1.so.3.2.0 libdbus-1.so.2&lt;/strong&gt;&lt;br/&gt;&lt;strong&gt;sudo ldconfig&lt;/strong&gt; &lt;br/&gt;&lt;strong&gt;gaim&lt;/strong&gt;&lt;style&gt;i{content: normal !important}&lt;/style&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-1930532533347248867?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/1930532533347248867/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=1930532533347248867' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1930532533347248867'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1930532533347248867'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/fixing-dbus-problems-with-gaim.html' title='Fixing dbus problems with gaim'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2429770502263770524</id><published>2006-12-17T20:56:00.001-08:00</published><updated>2006-12-17T20:56:57.076-08:00</updated><title type='text'>vi tab retab --- space to tab -- tab to space</title><content type='html'>:set tabstop=4        " Force tabs to be displayed/expanded to 4 spaces (instead of default 8).&lt;br /&gt;  :set softtabstop=4    " Make Vim treat &lt;Tab&gt; key as 4 spaces, but respect hard Tabs.&lt;br /&gt;  :                     "   I don't think this one will do what you want.&lt;br /&gt;  :set expandtab        " Turn Tab keypresses into spaces.  Sounds like this is happening to you.&lt;br /&gt;                        "    You can still insert real Tabs as [Ctrl]-V [Tab].&lt;br /&gt;  :set noexpandtab      " Leave Tab keys as real tabs (ASCII 9 character).&lt;br /&gt;  :1,$retab!            " Convert all tabs to space or ASCII-9 (per "expandtab"),&lt;br /&gt;                        "   on lines 1_to_end-of-file.&lt;br /&gt;  :set shiftwidth=4     " When auto-indenting, indent by this much.&lt;br /&gt;                        "   (Use spaces/tabs per "expandtab".)&lt;br /&gt;  :help tabstop         " Find out more about this stuff.&lt;br /&gt;  :help vimrc           " Find out more about .vimrc/_vimrc :-)&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2429770502263770524?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2429770502263770524/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2429770502263770524' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2429770502263770524'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2429770502263770524'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/vi-tab-retab-space-to-tab-tab-to-space.html' title='vi tab retab --- space to tab -- tab to space'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-2763674875930967397</id><published>2006-12-14T13:04:00.000-08:00</published><updated>2006-12-14T13:06:30.993-08:00</updated><title type='text'>Make python use UTF8 instead of ascii</title><content type='html'>cd /usr/lib/python2.3/site-packages/&lt;br /&gt;vi sitecustomize.py&lt;br /&gt;&lt;br /&gt;(add the following lines to the file, save it)&lt;br /&gt;import sys, codecs&lt;br /&gt;sys.setdefaultencoding('utf-8')&lt;br /&gt;&lt;br /&gt;from&lt;br /&gt;http://plone.org/products/cmfcontentpanels/issues/1&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-2763674875930967397?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/2763674875930967397/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=2763674875930967397' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2763674875930967397'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/2763674875930967397'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/make-python-use-utf8-instead-of-ascii.html' title='Make python use UTF8 instead of ascii'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-1096560720718869867</id><published>2006-12-11T00:42:00.001-08:00</published><updated>2006-12-11T00:42:37.049-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='postgres'/><title type='text'>OSCON 2005 PostgreSQL Presentations</title><content type='html'>&lt;p&gt;The &lt;a class="reference" href="http://techdocs.postgresql.org/oscon2005/index.php"&gt;OSCON 2005 PostgreSQL Presentations&lt;/a&gt; are up, but a bunch of them are in OpenOffice format. I went through the pain of installing OpenOffice to convert them to PDF. For completeness, I have also included the presentations that were already available as PDF.&lt;/p&gt; &lt;p&gt;Chris Browne:&lt;/p&gt; &lt;ul class="simple"&gt;&lt;li&gt;&lt;a class="reference" href="http://redivi.com/%7Ebob/oscon2005_pgsql_pdf/slony-talk.pdf"&gt;Replicating PostgreSQL Databases Using Slony-I&lt;/a&gt;&lt;/li&gt;&lt;li&gt;&lt;a class="reference" href="http://redivi.com/%7Ebob/oscon2005_pgsql_pdf/event-prop.pdf"&gt;Event Propagation in Slony-I&lt;/a&gt;&lt;/li&gt;&lt;li&gt;&lt;a class="reference" href="http://redivi.com/%7Ebob/oscon2005_pgsql_pdf/tutorial-rt.pdf"&gt;Sample Replication RT/3&lt;/a&gt;&lt;/li&gt;&lt;li&gt;&lt;a class="reference" href="http://redivi.com/%7Ebob/oscon2005_pgsql_pdf/slony-future.pdf"&gt;Upcoming Slony-I Enhancements&lt;/a&gt;&lt;/li&gt;&lt;/ul&gt; &lt;p&gt;Joe Conway:&lt;/p&gt; &lt;ul class="simple"&gt;&lt;li&gt;&lt;a class="reference" href="http://redivi.com/%7Ebob/oscon2005_pgsql_pdf/pres_oscon_2005_r1.pdf"&gt;Terrabytes of Business Intelligence&lt;/a&gt;&lt;/li&gt;&lt;/ul&gt; &lt;p&gt;Lance Obermeyer:&lt;/p&gt; &lt;ul class="simple"&gt;&lt;li&gt;&lt;a class="reference" href="http://techdocs.postgresql.org/oscon2005/lance.obermeyer/MSCS_with_postgres.pdf"&gt;Running PostgreSQL On Windows&lt;/a&gt;&lt;/li&gt;&lt;/ul&gt; &lt;p&gt;Bruce Momjian:&lt;/p&gt; &lt;ul class="simple"&gt;&lt;li&gt;&lt;a class="reference" href="http://techdocs.postgresql.org/oscon2005/bruce.momjian/features.pdf"&gt;State of PostgreSQL 8.1&lt;/a&gt;&lt;/li&gt;&lt;li&gt;&lt;a class="reference" href="http://techdocs.postgresql.org/oscon2005/bruce.momjian/win32_port.pdf"&gt;Porting PostgreSQL To Windows&lt;/a&gt;&lt;/li&gt;&lt;/ul&gt; &lt;p&gt;Aaron Thul:&lt;/p&gt; &lt;ul class="simple"&gt;&lt;li&gt;&lt;a class="reference" href="http://techdocs.postgresql.org/oscon2005/aaron.thul/pg_built_your_car.pdf"&gt;PostgreSQL Built Your Car&lt;/a&gt;&lt;/li&gt;&lt;/ul&gt; &lt;p&gt;Robert Treat:&lt;/p&gt; &lt;ul class="simple"&gt;&lt;li&gt;&lt;a class="reference" href="http://redivi.com/%7Ebob/oscon2005_pgsql_pdf/OSCON_Explaining_Explain_Public.pdf"&gt;Explaining Explain&lt;/a&gt;&lt;/li&gt;&lt;/ul&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-1096560720718869867?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/1096560720718869867/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=1096560720718869867' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1096560720718869867'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1096560720718869867'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/oscon-2005-postgresql-presentations.html' title='OSCON 2005 PostgreSQL Presentations'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-7700791393930785060</id><published>2006-12-10T22:12:00.000-08:00</published><updated>2006-12-10T22:14:14.618-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='python regexp'/><title type='text'>How I replace out divs in inserts to keep the layout</title><content type='html'>i.Description =re.sub(r'&lt;.div[^&gt;]*?&gt;', '', i.Description)#replaces div id tags&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://bp0.blogger.com/_62vafIBqOVg/RXz29TsZOlI/AAAAAAAAAAM/rYReXej15DI/s1600-h/Screenshot-Kodos+-+The+Python+Regex+Debugger.png"&gt;&lt;img style="margin: 0px auto 10px; display: block; text-align: center; cursor: pointer;" src="http://bp0.blogger.com/_62vafIBqOVg/RXz29TsZOlI/AAAAAAAAAAM/rYReXej15DI/s400/Screenshot-Kodos+-+The+Python+Regex+Debugger.png" alt="" id="BLOGGER_PHOTO_ID_5007148418848078418" border="0" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-7700791393930785060?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/7700791393930785060/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=7700791393930785060' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/7700791393930785060'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/7700791393930785060'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/how-i-replace-out-divs-in-inserts-to.html' title='How I replace out divs in inserts to keep the layout'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://bp0.blogger.com/_62vafIBqOVg/RXz29TsZOlI/AAAAAAAAAAM/rYReXej15DI/s72-c/Screenshot-Kodos+-+The+Python+Regex+Debugger.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-6078786598879658903</id><published>2006-12-05T22:04:00.001-08:00</published><updated>2006-12-05T22:04:33.698-08:00</updated><title type='text'>When div when span</title><content type='html'>DIV is an arbitrary *block* element. It can contain other block&lt;br /&gt;elements (including other DIVs). P is block element for&lt;br /&gt;paragraphs. P cannot contain other block elements; it cannot&lt;br /&gt;contain other Ps; it cannot contain DIVs. DIV does not create a&lt;br /&gt;new P. But DIV will close any P that is open.&lt;br /&gt;[color=blue]&lt;br /&gt;&gt; I tend to use SPAN because it does not generate any line break.[/color]&lt;br /&gt;&lt;br /&gt;SPAN is an *inline* element. It cannot contain a block element.&lt;br /&gt;SPAN is closed whenever the block containing it is closed.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-6078786598879658903?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/6078786598879658903/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=6078786598879658903' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6078786598879658903'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/6078786598879658903'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/when-div-when-span.html' title='When div when span'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8412357162083900814</id><published>2006-12-03T20:33:00.000-08:00</published><updated>2006-12-03T20:37:18.767-08:00</updated><title type='text'>Faster Inserts with PostgreSQL</title><content type='html'>&lt;h2 class="entryTitle"&gt;Faster Inserts with PostgreSQL&lt;/h2&gt;  &lt;span class="catImg"&gt;&lt;/span&gt;As I mentioned yesterday, I'm working on optimizing lots of inserts into a database, and I need solutions for different DB servers. Today I have been working with PostgreSQL. Although the PostgreSQL &lt;code&gt;COPY&lt;/code&gt; command is recommended, I can't seam to get it to work with ColdFusion, and &lt;code&gt;cfquery&lt;/code&gt;. I've tried lots of different ways of doing it. &lt;p&gt;&lt;strong&gt;Fast Insert Solution on PostgreSQL:&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;So far the fastest solution I have come up with is using &lt;code&gt;PREPARE&lt;/code&gt; to create a temporary prepared statement (yes, I'm aware of &lt;code&gt;cfqueryparam&lt;/code&gt;, and this method inserts with the highest speed). So here's how you do it:&lt;/p&gt; &lt;pre&gt;PREPARE preparedInsert (int, varchar) AS&lt;br /&gt; INSERT INTO tableName (intColumn, charColumn)&lt;br /&gt; VALUES ($1, $2);&lt;br /&gt;EXECUTE preparedInsert (1,'a');&lt;br /&gt;EXECUTE preparedInsert (2,'b');&lt;br /&gt;EXECUTE preparedInsert (3,'c');&lt;br /&gt;DEALLOCATE preparedInsert;&lt;br /&gt;&lt;/pre&gt; &lt;p&gt;Your basically creating a function that allows you to pass variables to your insert statement. Inside the first set of parenthesis you list the types of your variables, then variables are referred to as &lt;code&gt;$1&lt;/code&gt;, &lt;code&gt;$3&lt;/code&gt;, etc. inside the statement.&lt;/p&gt; &lt;p&gt;Next you can &lt;code&gt;EXECUTE&lt;/code&gt; the statement as many times as you need to (this can all be done inside one SQL statement, inside one cfquery tag).&lt;/p&gt; &lt;p&gt;Finally when I'm done, I &lt;code&gt;DEALLOCATE&lt;/code&gt; the function, otherwise if you try to &lt;code&gt;PREPARE&lt;/code&gt; a statement named &lt;code&gt;preparedInsert&lt;/code&gt; again during the same connection session you will get an error.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8412357162083900814?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8412357162083900814/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8412357162083900814' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8412357162083900814'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8412357162083900814'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/12/faster-inserts-with-postgresql.html' title='Faster Inserts with PostgreSQL'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-4233495232267030350</id><published>2006-11-20T03:53:00.001-08:00</published><updated>2006-11-20T03:55:05.242-08:00</updated><title type='text'>Experimenting with functional python</title><content type='html'>http://www.bluebits.gr/weblog/programming/experimenting-with-functional-python-2006-07-26-20-11.html&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-4233495232267030350?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='related' href='http://www.bluebits.gr/weblog/programming/experimenting-with-functional-python-2006-07-26-20-11.html' title='Experimenting with functional python'/><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/4233495232267030350/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=4233495232267030350' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4233495232267030350'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/4233495232267030350'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/11/experimenting-with-functional-python.html' title='Experimenting with functional python'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-8207675593170979962</id><published>2006-11-20T03:53:00.000-08:00</published><updated>2006-11-20T03:54:58.575-08:00</updated><title type='text'>Building Web Pages with Python</title><content type='html'>http://adminspotting.net/building-web-pages-with-python/&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-8207675593170979962?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/8207675593170979962/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=8207675593170979962' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8207675593170979962'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/8207675593170979962'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/11/building-web-pages-with-python.html' title='Building Web Pages with Python'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-9177188403070492369</id><published>2006-10-10T20:49:00.000-07:00</published><updated>2006-10-10T20:58:24.705-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='unicode python'/><title type='text'>Default python encoding to unicode</title><content type='html'>In file&lt;br /&gt;&lt;span style="font-weight: bold;font-family:courier new;" &gt;vim /usr/lib/python2.4/site.py&lt;/span&gt;&lt;br /&gt;Comment out line 352 and add 353&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;352     #encoding = "ascii" # Default value set by _PyUnicode_Init()&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;353     encoding = "utf8"&lt;/span&gt;&lt;span style="font-weight: bold;"&gt;&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;to check&lt;span style="font-weight: bold;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;ipython&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;import sys&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;sys.getdefaultencoding()&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;Out[2]: 'utf8'&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-9177188403070492369?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/9177188403070492369/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=9177188403070492369' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/9177188403070492369'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/9177188403070492369'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/10/default-python-encoding-to-unicode.html' title='Default python encoding to unicode'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-1260796156950719917</id><published>2006-10-09T02:44:00.000-07:00</published><updated>2006-10-09T02:45:10.766-07:00</updated><title type='text'>cgi programming with python</title><content type='html'>Take a look at Guido's examples here...&lt;br /&gt;&lt;br /&gt;http://www.python.org/doc/essays/ppt/sd99east/sld057.htm&lt;br /&gt;&lt;br /&gt;Best wishes,&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-1260796156950719917?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='related' href='http://www.python.org/doc/essays/ppt/sd99east/sld057.htm' title='cgi programming with python'/><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/1260796156950719917/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=1260796156950719917' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1260796156950719917'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1260796156950719917'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/10/cgi-programming-with-python.html' title='cgi programming with python'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-33306823.post-1720546708232304920</id><published>2006-09-29T22:28:00.000-07:00</published><updated>2006-09-29T22:29:22.822-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='windows'/><title type='text'>How to disable system beep in windows</title><content type='html'>You can find and configure it under Device Manager|View|Show Hidden Devices|Non Plug and &lt;br/&gt;Play|Beep|Action|Properties|Driver, then set the "Startup Type:" to "Disabled"&lt;br/&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/33306823-1720546708232304920?l=pylab.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://pylab.blogspot.com/feeds/1720546708232304920/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=33306823&amp;postID=1720546708232304920' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1720546708232304920'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/33306823/posts/default/1720546708232304920'/><link rel='alternate' type='text/html' href='http://pylab.blogspot.com/2006/09/how-to-disable-system-beep-in-windows.html' title='How to disable system beep in windows'/><author><name>NextGenSearch</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry></feed>
