diff --git a/PythonDocs2DocSet/create_docset.py b/PythonDocs2DocSet/create_docset.py index a763e7c..2ec5462 100755 --- a/PythonDocs2DocSet/create_docset.py +++ b/PythonDocs2DocSet/create_docset.py @@ -90,6 +90,7 @@ def find_existing_file(possible): modindex_path = find_existing_file([ "modindex.html", "py-modindex.html", + "np-modindex.html" ]) genindex_path = find_existing_file([ @@ -170,24 +171,21 @@ def find_existing_file(possible): apple_ref = "//apple_ref/cpp/cat/%s" % name pages[href].append(apple_ref) - -## Collect pages from the general index -with codecs.open(os.path.join(source_folder, genindex_path), 'r', encoding="utf-8") as f: - for line in f: - for search in re.finditer("(
|, )", line): - href = search.group(2) - if not href in pages: - pages[href] = [] - - -## Collect pages from the library index -if os.path.exists(os.path.join(source_folder, "library/index.html")): - with codecs.open(os.path.join(source_folder, "library/index.html"), 'r', encoding="utf-8") as f: - for line in f: - for search in re.finditer("", line): - href = "library/" + search.group(1) - if not ("http://" in href or "https://" in href or href in pages): - pages[href] = [] +## Collect remaning HTML pages +for path,_,files in os.walk ("."): + # Clean up path (remove "./") + cleanPath = re.sub (r"^./", "", path) + if cleanPath == ".": + cleanPath = "" + if os.path.samefile (path, dest_folder): + continue + + # Walk through HTML files + for f in files: + if re.match (r".*\.html$", f): + href = os.path.join (cleanPath, f) + if not href in pages: + pages[href] = [] with codecs.open(token_path, "w", encoding="utf-8" ) as tokens: ## Start of the tokens file @@ -222,41 +220,45 @@ def find_existing_file(possible): ## This adds some hidden tags that makes Dash display this page's ## TOC on the left side of the screen, just like with iOS and OSX docs - toc = soup.find('div', 'sphinxsidebarwrapper').findAll("a", "reference") - if len(toc) > 0: - toc_tag = soup.new_tag("div", style="display:none;") - soup.body.append(toc_tag) - a_tag = soup.new_tag("a") - a_tag["name"] = "#" - toc_tag.append(a_tag) - h3_tag = soup.new_tag("h3") - h3_tag["class"] = "tasks" - h3_tag.append("TOC") - toc_tag.append(h3_tag) - ul_tag = soup.new_tag("ul") - ul_tag["class"] = "tooltip" - toc_tag.append(ul_tag) - - for t in toc: - li_tag = soup.new_tag("li") - li_tag["class"] = "tooltip" - ul_tag.append(li_tag) - a_tag = soup.new_tag("a") - a_tag["href"] = t['href'] - a_tag.append(t.text) - li_tag.append(a_tag) - - if len(names) > 0: - tokens.write("\n" % href) - for name in names: - tokens.write("\t%s%s\n" % (name, name)) - tokens.write("\n") - - newFilePath = os.path.join(dest_folder, href) - if not os.path.exists(os.path.dirname(newFilePath)): - os.makedirs(os.path.dirname(newFilePath)) # might be a bug...if given something/test.html, it creates test.html as a directory! - with codecs.open(newFilePath, "w", encoding="utf-8") as newFile: - newFile.write(unicode(soup)) + tocdiv = soup.find('div', 'sphinxsidebarwrapper') + if tocdiv is None: + tocdiv = soup.find('div', 'sphinxsidebar') + + if tocdiv is not None: + toc = tocdiv.findAll("a", "reference") + if len(toc) > 0: + toc_tag = soup.new_tag("div", style="display:none;") + soup.body.append(toc_tag) + a_tag = soup.new_tag("a") + a_tag["name"] = "#" + toc_tag.append(a_tag) + h3_tag = soup.new_tag("h3") + h3_tag["class"] = "tasks" + h3_tag.append("TOC") + toc_tag.append(h3_tag) + ul_tag = soup.new_tag("ul") + ul_tag["class"] = "tooltip" + toc_tag.append(ul_tag) + + for t in toc: + li_tag = soup.new_tag("li") + li_tag["class"] = "tooltip" + ul_tag.append(li_tag) + a_tag = soup.new_tag("a") + a_tag["href"] = t['href'] + a_tag.append(t.text) + li_tag.append(a_tag) + + tokens.write("\n" % href) + for name in names: + tokens.write("\t%s%s\n" % (name, name)) + tokens.write("\n") + + newFilePath = os.path.join(dest_folder, href) + if not os.path.exists(os.path.dirname(newFilePath)): + os.makedirs(os.path.dirname(newFilePath)) # might be a bug...if given something/test.html, it creates test.html as a directory! + with codecs.open(newFilePath, "w", encoding="utf-8") as newFile: + newFile.write(unicode(soup)) tokens.write("") @@ -274,3 +276,6 @@ def find_existing_file(possible): os.remove(os.path.join(docset_folder, "Contents/Resources/Tokens.xml")) print("done") +print("") +print("You might have to manually add missing references (images, ...) as they are not automatically detected.") +print("It is also a good practice to remove additional elements, such as headers, sidebars, and so on.") diff --git a/PythonDocs2DocSet/create_docset_json.py b/PythonDocs2DocSet/create_docset_json.py new file mode 100755 index 0000000..16cfce2 --- /dev/null +++ b/PythonDocs2DocSet/create_docset_json.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python +# encoding: utf-8 + +import re +import os +import shutil +import subprocess +import os.path +import codecs +import json +from xml.sax.saxutils import escape +from bs4 import BeautifulSoup + +## Tries to find docsetutil +possible_docsetutil_path = [ + "/Developer/usr/bin/docsetutil", + "/Applications/Xcode.app/Contents/Developer/usr/bin/docsetutil", +] +docsetutil_path = [path + for path in possible_docsetutil_path + if os.path.exists(path)] +if len(docsetutil_path) == 0: + print ("Could not find docsetutil. Please check for docsetutil's " + "location and set it inside the script.") + exit(1) + +docsetutil_path = docsetutil_path[0] + +## Script should run in the folder where the docs live +source_folder = os.getcwd() + +## Find the Python version of the docs +python_version = None +with codecs.open(os.path.join(source_folder, "index.html"), 'r', encoding="utf-8") as f: + for line in f: + search = re.search("dash; (.*?) documentation", line) + if search: + python_version = search.group(1) + break + search = re.search(".*?dash; (.*? v[^ <]+) ", line) + if search: + python_version = search.group(1) + break + +if python_version == None: + print ("I could not find Python's version in the index.html " + "file. Are you in the right folder??") + exit(1) + +docset_name = python_version.strip().lower().replace(" ", "_") +dest_folder = os.path.join(source_folder, ("%s.docset/" % docset_name)) + +def find_existing_file(possible): + path = [path for path in possible if os.path.exists(os.path.join(source_folder, path))] + if len(path) == 0: + print ("Could not find %s. Please check your doc folder structure and " + "try again." % " or ".join(possible)) + raise Exception() + return path[0] + +## Clean up first +if os.path.exists(dest_folder): + shutil.rmtree(dest_folder) + +## Create all the necessary folder hierarchy +os.makedirs(dest_folder + "Contents/Resources/Documents/") +docset_folder = dest_folder +dest_folder = os.path.join(dest_folder, "Contents") + +searchindex_path = find_existing_file ([ + "searchindex.json" +]) + +genindex_path = find_existing_file([ + "genindex-all.html", + "genindex.html", +]) + +## Create Info.plist +with codecs.open(os.path.join(dest_folder, "Info.plist"), "w", encoding="utf-8") as info: + info.write("""<?xml version="1.0" encoding="UTF-8"?> + <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> + <plist version="1.0"> + <dict> + <key>CFBundleIdentifier</key> + <string>python.%s</string> + <key>CFBundleName</key> + <string>%s</string> + <key>DocSetPlatformFamily</key> + <string>python</string> + </dict> + </plist> + """ % (python_version.strip().lower().replace(" ", "."), python_version.strip())) + +## Create Nodes.xml +dest_folder = os.path.join(dest_folder, "Resources") +nodes = codecs.open(os.path.join(dest_folder, "Nodes.xml"), "w", encoding="utf-8") +nodes.write("""<?xml version="1.0" encoding="UTF-8"?> +<DocSetNodes version="1.0"> + <TOC> + <Node type="folder"> + <Name>Modules Index</Name> + <Path>%s</Path> + </Node> + </TOC> +</DocSetNodes> +""" % genindex_path) + +## Create the tokens file +token_path = os.path.join(dest_folder, "Tokens.xml") +dest_folder = os.path.join(dest_folder, "Documents") + +## Collect all files in folder +for path,_,files in os.walk ("."): + # Clean up path (remove "./") + cleanPath = re.sub (r"^./", "", path) + if cleanPath == ".": + cleanPath = "" + if os.path.samefile (os.path.commonprefix([os.path.abspath(path), docset_folder]), docset_folder): + continue + if cleanPath != "": + os.makedirs (os.path.join (dest_folder, cleanPath)) + + # Walk through files + for f in files: + href = os.path.join (cleanPath, f) + print "copying {0} -> {1}".format (href, os.path.join (dest_folder, cleanPath)) + shutil.copy (href, os.path.join (dest_folder, cleanPath)) + +## I'll hide the header because it makes no sense in a docset +## and messes up Dash +with codecs.open(os.path.join(dest_folder, "_static/basic.css"), "a+", encoding="utf-8") as css: + css.write("div.related {display:none;}\n") + css.write("div.sphinxsidebar {display:none;}\n") + +with codecs.open(os.path.join(dest_folder, "_static/default.css"), "a+", encoding="utf-8") as css: + css.write("a.headerlink {display:none;}\n") + css.write("div.bodywrapper {margin: 0 0 0 0px;}") + +searchindex = None +with codecs.open(searchindex_path, "r", encoding="utf-8") as sifd: + searchindex = json.load (sifd) + +funtypenames = ["Module", "Class", "Method", "Class method", "Function", "Exception", "Attribute"] +funtypedef = ["cat", "cl", "clm", "clm", "func", "cl", "instp"] +funconv = {} + +print ("Available object types :") +for ft,z in zip (funtypenames, range (len (funtypenames))): + print (" {0}: {1}".format (ft, z+1)) + +print ("Detected object types :") +for dty in searchindex['objtypes']: + stype = searchindex['objtypes'][dty].split (":")[-1] + + # Do some guesswork for common types + if stype == 'class': + funconv[dty] = "cl" + elif stype == 'module': + funconv[dty] = "cat" + elif stype == 'data': + funconv[dty] = "instp" + elif stype == 'function': + funconv[dty] = "func" + elif stype == 'method': + funconv[dty] = "clm" + elif stype == 'exception': + funconv[dty] = "cl" + elif stype == 'attribute': + funconv[dty] = 'instp' + elif stype == 'staticmethod': + funconv[dty] = 'clm' + elif stype == 'member': + funconv[dty] = 'instp' + elif stype == 'type': + funconv[dty] = 'cl' + elif stype == 'var': + funconv[dty] = 'instp' + elif stype == 'macro': + funconv[dty] = 'func' + + # Else, ask the user + else: + i = raw_input ("Enter type (1-{0}) for '{1}': ".format (len(funtypenames), searchindex['objtypes'][dty])) + funconv[dty] = funtypedef[int(i)-1] + +with codecs.open(token_path, "w", encoding="utf-8" ) as tokens: + ## Start of the tokens file + tokens.write("""<?xml version="1.0" encoding="UTF-8"?> + <Tokens version="1.0"> + """) + + count = 0 + for filename in searchindex['filenames']: + print ("adding file {0}".format (filename)) + tokens.write("<File path=\"%s.html\">\n" % filename) + + # Open file + with codecs.open (filename+".html", "r", encoding="utf-8") as tmp: + # Read HTML structure + soup = BeautifulSoup (tmp) + + ## This adds some hidden tags that makes Dash display this page's + ## TOC on the left side of the screen, just like with iOS and OSX docs + tocdiv = None + if tocdiv is None: + tocdiv = soup.find('div', 'sphinxsidebarwrapper') + if tocdiv is None: + tocdiv = soup.find('div', 'sphinxsidebar') + + if tocdiv is not None: + toc = tocdiv.findAll("a", "reference") + if len(toc) > 0: + toc_tag = soup.new_tag("div", style="display:none;") + soup.body.append(toc_tag) + a_tag = soup.new_tag("a") + a_tag["name"] = "#" + toc_tag.append(a_tag) + h3_tag = soup.new_tag("h3") + h3_tag["class"] = "tasks" + h3_tag.append("TOC") + toc_tag.append(h3_tag) + ul_tag = soup.new_tag("ul") + ul_tag["class"] = "tooltip" + toc_tag.append(ul_tag) + + for t in toc: + li_tag = soup.new_tag("li") + li_tag["class"] = "tooltip" + ul_tag.append(li_tag) + a_tag = soup.new_tag("a") + a_tag["href"] = t['href'] + a_tag.append(t.text) + li_tag.append(a_tag) + + # ...and write that inside the HTML file + with codecs.open(os.path.join (dest_folder, filename+".html"), "w", encoding="utf-8") as newFile: + newFile.write(unicode(soup)) + + # Look inside each module for elements that belong to this file + for mod in searchindex['objects']: + for f in searchindex['objects'][mod]: + # Location of the keyword + dat = searchindex['objects'][mod][f] + + # Type of the keyword + fty = funconv[str(dat[1])] + + # Use point-separated names for Python docs + if mod != '': + f = mod + "." + f + + # If this keyword belongs to this file, then go on + if dat[0] == count: + # Try to find anchor with the same name as the keyword + anchor = None + + # Try for exact match + tabN = soup.findAll (lambda x:('id' in x.attrs and f == x.attrs['id'])) + + # If not, try for lower-case match + if len(tabN) == 0: + tabL = soup.findAll (lambda x:('id' in x.attrs and f.lower() == x.attrs['id'])) + if len(tabL) > 0: + anchor = f.lower() + + # If not, try to guess alternative matches + else: + tabA = soup.findAll (lambda x:('dt' == x.name and 'id' in x.attrs and (f in x.attrs['id'].lower() or f.lower() in x.attrs['id'].lower()))) + if len(tabA) == 0: + print "Warning: Did not find {0} in {1}".format (f, filename+".html") + else: + for _fta in tabA: + _fa = _fta.attrs['id'] + g1 = re.match (r"^(?P<ftype>([^\s]+\s+)*?)([^\s\(\)]+::)*(?P<fname>[^\s\(\):\<\>]+)(\<[^\<\>]*\>)?\(.*\)[^\(\)]*$", _fa) + g2 = re.match (r"^(?P<ftype>([^\s]+\s+)*?)([^\s\(\)]+::)*(?P<fname>[^\s\(\):\<\>]+)$", _fa) + g3 = re.match (r"^(?P<fpackage>([^\.]+\.)*?)(?P<fname>[^\s\(\):\.\<\>]+)$", _fa) + if (g1 is not None and g1.group('fname') == f) or (g2 is not None and g2.group('fname') == f) or (g3 is not None and g3.group('fname') == f): + anchor = _fa + print "Warning: Did not find exact match for {0} in {1}, defaulting to {2}".format (f, filename+".html", anchor) + break + print "Warning: Did not find any match for {0} in {1}".format (f, filename+".html") + else: + anchor = f + + # See if this is a full C(++) function description, in which case extract the name + name = f + if ' ' in f or '(' in f or ')' in f or ':' in f: + g = re.match (r"^(?P<ftype>([^\s]+\s+)*?)([^\s\(\)]+::)*(?P<fname>[^\s\(\):\<\>]+)(\<[^\<\>]*\>)?\(.*\)[^\(\)]*$", f) + if g is None: + # Match variable name + g = re.match (r"^(?P<ftype>([^\s]+\s+)*?)([^\s\(\)]+::)*(?P<fname>[^\s\(\):\<\>]+)$", f) + if g is None: + continue + else: + name = g.group('fname') + else: + name = g.group('fname') + + + # Write identifier + name = escape (name) + tokens.write ("\t<Token><TokenIdentifier>//apple_ref/cpp/%s/%s</TokenIdentifier>" % (fty, name)) + + # Write anchor if needed + if anchor is not None: + anchor = escape (anchor) + tokens.write ("<Anchor>%s</Anchor>" % anchor) + + # ...and finalize + tokens.write ("</Token>\n") + + count = count + 1 + tokens.write("</File>\n") + tokens.write("</Tokens>") + +try: + + print("calling docsetutil") + subprocess.call([docsetutil_path, "index", docset_folder]) + +except OSError as e: + + print("something went wrong trying to call docsetutil: ", e) + +## Cleanup +os.remove(os.path.join(docset_folder, "Contents/Resources/Nodes.xml")) +os.remove(os.path.join(docset_folder, "Contents/Resources/Tokens.xml")) + +print ("done") diff --git a/PythonDocs2DocSet/create_docset_json.sh b/PythonDocs2DocSet/create_docset_json.sh new file mode 100755 index 0000000..bd1be67 --- /dev/null +++ b/PythonDocs2DocSet/create_docset_json.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Base Script File (convert.sh) +# Created: Wed Mar 14 01:20:32 2012 +# Version: 1.0 +# Author: François-Xavier Thomas <fx.thomas@gmail.com> +# +# This Bash script was developped by François-Xavier Thomas. +# You are free to copy, adapt or modify it. +# If you do so, however, leave my name somewhere in the credits, I'd appreciate it ;) + +JS_DICT=$(cat searchindex.js | sed 's/Search\.setIndex(\(.*\))/\1/g') + +echo "o = $JS_DICT;" >> _tmp.js +echo "process.stdout.write (JSON.stringify (o));" >> _tmp.js + +node _tmp.js > searchindex.json +rm _tmp.js + +python `dirname $0`/create_docset_json.py