Files
openlayers/tools/exampleparser.py
Tim Schaub cadb20c18a Use clones of git repos for site building.
The svn interface to our repos on git is not behaving well.  This is resulting in the website, examples, and api docs not being updated when they should (`svn up` is not doing what it should).

Instead of using svn checkouts, we should use clones of the git repos.  At the same time, it makes sense to organize things a bit.  These changes depend on a new structure on the openlayers.org machine:

    /osgeo/openlayers/repos - clones of our openlayers, docs, and website repos
    /osgeo/openlayers/sites - sources for openlayers.org, dev.openlayers.org, and docs.openlayers.org

Where things are "built" (e.g. minifying js and modifying example markup to point to that js), changes are now made outside of the repo cleans.  Having dirty repos was another source of build failures.

Changes still need to be made to the release process/script.
2012-05-10 01:03:54 -06:00

252 lines
7.6 KiB
Python
Executable File

#!/usr/bin/env python
import sys
import os
import re
import time
from xml.dom.minidom import Document
try:
import xml.etree.ElementTree as ElementTree
except ImportError:
try:
import cElementTree as ElementTree
except ImportError:
try:
import elementtree.ElementTree as ElementTree
except ImportError:
import lxml.etree as ElementTree
missing_deps = False
try:
import json
except ImportError:
try:
import simplejson as json
except ImportError, E:
missing_deps = E
try:
from BeautifulSoup import BeautifulSoup
except ImportError, E:
missing_deps = E
feedName = "example-list.xml"
feedPath = "http://openlayers.org/dev/examples/"
def getListOfExamples(relPath):
"""
returns list of .html filenames within a given path - excludes example-list.html
"""
examples = os.listdir(relPath)
examples = [example for example in examples if example.endswith('.html') and example != "example-list.html"]
return examples
def getExampleHtml(path):
"""
returns html of a specific example
"""
print '.',
f = open(path)
html = f.read()
f.close()
return html
def extractById(soup, tagId, value=None):
"""
returns full contents of a particular tag id
"""
beautifulTag = soup.find(id=tagId)
if beautifulTag:
if beautifulTag.contents:
value = str(beautifulTag.renderContents()).strip()
value = value.replace('\t','')
value = value.replace('\n','')
return value
def getRelatedClasses(html):
"""
parses the html, and returns a list of all OpenLayers Classes
used within (ie what parts of OL the javascript uses).
"""
rawstr = r'''(?P<class>OpenLayers\..*?)\('''
return re.findall(rawstr, html)
def parseHtml(html,ids):
"""
returns dictionary of items of interest
"""
soup = BeautifulSoup(html)
d = {}
for tagId in ids:
d[tagId] = extractById(soup,tagId)
#classes should eventually be parsed from docs - not automatically created.
classes = getRelatedClasses(html)
d['classes'] = classes
return d
def getGitInfo(exampleDir, exampleName):
orig = os.getcwd()
os.chdir(exampleDir)
h = os.popen("git log -n 1 --pretty=format:'%an|%ai' " + exampleName)
os.chdir(orig)
log = h.read()
h.close()
d = {}
parts = log.split("|")
d["author"] = parts[0]
# compensate for spaces in git log time
td = parts[1].split(" ")
td.insert(1, "T")
d["date"] = "".join(td)
return d
def createFeed(examples):
doc = Document()
atomuri = "http://www.w3.org/2005/Atom"
feed = doc.createElementNS(atomuri, "feed")
feed.setAttribute("xmlns", atomuri)
title = doc.createElementNS(atomuri, "title")
title.appendChild(doc.createTextNode("OpenLayers Examples"))
feed.appendChild(title)
link = doc.createElementNS(atomuri, "link")
link.setAttribute("rel", "self")
link.setAttribute("href", feedPath + feedName)
modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime())
id = doc.createElementNS(atomuri, "id")
id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, feedName, modtime)))
feed.appendChild(id)
updated = doc.createElementNS(atomuri, "updated")
updated.appendChild(doc.createTextNode(modtime))
feed.appendChild(updated)
examples.sort(key=lambda x:x["modified"])
for example in sorted(examples, key=lambda x:x["modified"], reverse=True):
entry = doc.createElementNS(atomuri, "entry")
title = doc.createElementNS(atomuri, "title")
title.appendChild(doc.createTextNode(example["title"] or example["example"]))
entry.appendChild(title)
tags = doc.createElementNS(atomuri, "tags")
tags.appendChild(doc.createTextNode(example["tags"] or example["example"]))
entry.appendChild(tags)
link = doc.createElementNS(atomuri, "link")
link.setAttribute("href", "%s%s" % (feedPath, example["example"]))
entry.appendChild(link)
summary = doc.createElementNS(atomuri, "summary")
summary.appendChild(doc.createTextNode(example["shortdesc"] or example["example"]))
entry.appendChild(summary)
updated = doc.createElementNS(atomuri, "updated")
updated.appendChild(doc.createTextNode(example["modified"]))
entry.appendChild(updated)
author = doc.createElementNS(atomuri, "author")
name = doc.createElementNS(atomuri, "name")
name.appendChild(doc.createTextNode(example["author"]))
author.appendChild(name)
entry.appendChild(author)
id = doc.createElementNS(atomuri, "id")
id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, example["example"], example["modified"])))
entry.appendChild(id)
feed.appendChild(entry)
doc.appendChild(feed)
return doc
def wordIndex(examples):
"""
Create an inverted index based on words in title and shortdesc. Keys are
lower cased words. Values are dictionaries with example index keys and
count values.
"""
index = {}
unword = re.compile("\\W+")
keys = ["shortdesc", "title", "tags"]
for i in range(len(examples)):
for key in keys:
text = examples[i][key]
if text:
words = unword.split(text)
for word in words:
if word:
word = word.lower()
if index.has_key(word):
if index[word].has_key(i):
index[word][i] += 1
else:
index[word][i] = 1
else:
index[word] = {i: 1}
return index
if __name__ == "__main__":
if missing_deps:
print "This script requires json or simplejson and BeautifulSoup. You don't have them. \n(%s)" % E
sys.exit()
if len(sys.argv) == 3:
inExampleDir = sys.argv[1]
outExampleDir = sys.argv[2]
else:
inExampleDir = "../examples"
outExampleDir = "../examples"
outFile = open(os.path.join(outExampleDir, "example-list.js"), "w")
print 'Reading examples from %s and writing out to %s' % (inExampleDir, outFile.name)
exampleList = []
docIds = ['title','shortdesc','tags']
examples = getListOfExamples(inExampleDir)
modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime())
for example in examples:
path = os.path.join(inExampleDir, example)
html = getExampleHtml(path)
tagvalues = parseHtml(html,docIds)
tagvalues['example'] = example
# add in author/date info
d = getGitInfo(inExampleDir, example)
tagvalues["author"] = d["author"] or "anonymous"
tagvalues["modified"] = d["date"] or modtime
tagvalues['link'] = example
exampleList.append(tagvalues)
print
exampleList.sort(key=lambda x:x['example'].lower())
index = wordIndex(exampleList)
json = json.dumps({"examples": exampleList, "index": index})
#give the json a global variable we can use in our js. This should be replaced or made optional.
json = 'var info=' + json
outFile.write(json)
outFile.close()
outFeedPath = os.path.join(outExampleDir, feedName);
print "writing feed to %s " % outFeedPath
atom = open(outFeedPath, 'w')
doc = createFeed(exampleList)
atom.write(doc.toxml())
atom.close()
print 'complete'