diff --git a/PythonDocs2DocSet/create_docset.py b/PythonDocs2DocSet/create_docset.py
index a763e7c..2ec5462 100755
--- a/PythonDocs2DocSet/create_docset.py
+++ b/PythonDocs2DocSet/create_docset.py
@@ -90,6 +90,7 @@ def find_existing_file(possible):
modindex_path = find_existing_file([
"modindex.html",
"py-modindex.html",
+ "np-modindex.html"
])
genindex_path = find_existing_file([
@@ -170,24 +171,21 @@ def find_existing_file(possible):
apple_ref = "//apple_ref/cpp/cat/%s" % name
pages[href].append(apple_ref)
-
-## Collect pages from the general index
-with codecs.open(os.path.join(source_folder, genindex_path), 'r', encoding="utf-8") as f:
- for line in f:
- for search in re.finditer("(
|, )", line):
- href = search.group(2)
- if not href in pages:
- pages[href] = []
-
-
-## Collect pages from the library index
-if os.path.exists(os.path.join(source_folder, "library/index.html")):
- with codecs.open(os.path.join(source_folder, "library/index.html"), 'r', encoding="utf-8") as f:
- for line in f:
- for search in re.finditer("", line):
- href = "library/" + search.group(1)
- if not ("http://" in href or "https://" in href or href in pages):
- pages[href] = []
+## Collect remaning HTML pages
+for path,_,files in os.walk ("."):
+ # Clean up path (remove "./")
+ cleanPath = re.sub (r"^./", "", path)
+ if cleanPath == ".":
+ cleanPath = ""
+ if os.path.samefile (path, dest_folder):
+ continue
+
+ # Walk through HTML files
+ for f in files:
+ if re.match (r".*\.html$", f):
+ href = os.path.join (cleanPath, f)
+ if not href in pages:
+ pages[href] = []
with codecs.open(token_path, "w", encoding="utf-8" ) as tokens:
## Start of the tokens file
@@ -222,41 +220,45 @@ def find_existing_file(possible):
## This adds some hidden tags that makes Dash display this page's
## TOC on the left side of the screen, just like with iOS and OSX docs
- toc = soup.find('div', 'sphinxsidebarwrapper').findAll("a", "reference")
- if len(toc) > 0:
- toc_tag = soup.new_tag("div", style="display:none;")
- soup.body.append(toc_tag)
- a_tag = soup.new_tag("a")
- a_tag["name"] = "#"
- toc_tag.append(a_tag)
- h3_tag = soup.new_tag("h3")
- h3_tag["class"] = "tasks"
- h3_tag.append("TOC")
- toc_tag.append(h3_tag)
- ul_tag = soup.new_tag("ul")
- ul_tag["class"] = "tooltip"
- toc_tag.append(ul_tag)
-
- for t in toc:
- li_tag = soup.new_tag("li")
- li_tag["class"] = "tooltip"
- ul_tag.append(li_tag)
- a_tag = soup.new_tag("a")
- a_tag["href"] = t['href']
- a_tag.append(t.text)
- li_tag.append(a_tag)
-
- if len(names) > 0:
- tokens.write("\n" % href)
- for name in names:
- tokens.write("\t%s%s\n" % (name, name))
- tokens.write("\n")
-
- newFilePath = os.path.join(dest_folder, href)
- if not os.path.exists(os.path.dirname(newFilePath)):
- os.makedirs(os.path.dirname(newFilePath)) # might be a bug...if given something/test.html, it creates test.html as a directory!
- with codecs.open(newFilePath, "w", encoding="utf-8") as newFile:
- newFile.write(unicode(soup))
+ tocdiv = soup.find('div', 'sphinxsidebarwrapper')
+ if tocdiv is None:
+ tocdiv = soup.find('div', 'sphinxsidebar')
+
+ if tocdiv is not None:
+ toc = tocdiv.findAll("a", "reference")
+ if len(toc) > 0:
+ toc_tag = soup.new_tag("div", style="display:none;")
+ soup.body.append(toc_tag)
+ a_tag = soup.new_tag("a")
+ a_tag["name"] = "#"
+ toc_tag.append(a_tag)
+ h3_tag = soup.new_tag("h3")
+ h3_tag["class"] = "tasks"
+ h3_tag.append("TOC")
+ toc_tag.append(h3_tag)
+ ul_tag = soup.new_tag("ul")
+ ul_tag["class"] = "tooltip"
+ toc_tag.append(ul_tag)
+
+ for t in toc:
+ li_tag = soup.new_tag("li")
+ li_tag["class"] = "tooltip"
+ ul_tag.append(li_tag)
+ a_tag = soup.new_tag("a")
+ a_tag["href"] = t['href']
+ a_tag.append(t.text)
+ li_tag.append(a_tag)
+
+ tokens.write("\n" % href)
+ for name in names:
+ tokens.write("\t%s%s\n" % (name, name))
+ tokens.write("\n")
+
+ newFilePath = os.path.join(dest_folder, href)
+ if not os.path.exists(os.path.dirname(newFilePath)):
+ os.makedirs(os.path.dirname(newFilePath)) # might be a bug...if given something/test.html, it creates test.html as a directory!
+ with codecs.open(newFilePath, "w", encoding="utf-8") as newFile:
+ newFile.write(unicode(soup))
tokens.write("")
@@ -274,3 +276,6 @@ def find_existing_file(possible):
os.remove(os.path.join(docset_folder, "Contents/Resources/Tokens.xml"))
print("done")
+print("")
+print("You might have to manually add missing references (images, ...) as they are not automatically detected.")
+print("It is also a good practice to remove additional elements, such as headers, sidebars, and so on.")
diff --git a/PythonDocs2DocSet/create_docset_json.py b/PythonDocs2DocSet/create_docset_json.py
new file mode 100755
index 0000000..16cfce2
--- /dev/null
+++ b/PythonDocs2DocSet/create_docset_json.py
@@ -0,0 +1,330 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+import re
+import os
+import shutil
+import subprocess
+import os.path
+import codecs
+import json
+from xml.sax.saxutils import escape
+from bs4 import BeautifulSoup
+
+## Tries to find docsetutil
+possible_docsetutil_path = [
+ "/Developer/usr/bin/docsetutil",
+ "/Applications/Xcode.app/Contents/Developer/usr/bin/docsetutil",
+]
+docsetutil_path = [path
+ for path in possible_docsetutil_path
+ if os.path.exists(path)]
+if len(docsetutil_path) == 0:
+ print ("Could not find docsetutil. Please check for docsetutil's "
+ "location and set it inside the script.")
+ exit(1)
+
+docsetutil_path = docsetutil_path[0]
+
+## Script should run in the folder where the docs live
+source_folder = os.getcwd()
+
+## Find the Python version of the docs
+python_version = None
+with codecs.open(os.path.join(source_folder, "index.html"), 'r', encoding="utf-8") as f:
+ for line in f:
+ search = re.search("dash; (.*?) documentation", line)
+ if search:
+ python_version = search.group(1)
+ break
+ search = re.search(".*?dash; (.*? v[^ <]+) ", line)
+ if search:
+ python_version = search.group(1)
+ break
+
+if python_version == None:
+ print ("I could not find Python's version in the index.html "
+ "file. Are you in the right folder??")
+ exit(1)
+
+docset_name = python_version.strip().lower().replace(" ", "_")
+dest_folder = os.path.join(source_folder, ("%s.docset/" % docset_name))
+
+def find_existing_file(possible):
+ path = [path for path in possible if os.path.exists(os.path.join(source_folder, path))]
+ if len(path) == 0:
+ print ("Could not find %s. Please check your doc folder structure and "
+ "try again." % " or ".join(possible))
+ raise Exception()
+ return path[0]
+
+## Clean up first
+if os.path.exists(dest_folder):
+ shutil.rmtree(dest_folder)
+
+## Create all the necessary folder hierarchy
+os.makedirs(dest_folder + "Contents/Resources/Documents/")
+docset_folder = dest_folder
+dest_folder = os.path.join(dest_folder, "Contents")
+
+searchindex_path = find_existing_file ([
+ "searchindex.json"
+])
+
+genindex_path = find_existing_file([
+ "genindex-all.html",
+ "genindex.html",
+])
+
+## Create Info.plist
+with codecs.open(os.path.join(dest_folder, "Info.plist"), "w", encoding="utf-8") as info:
+ info.write("""
+
+
+
+ CFBundleIdentifier
+ python.%s
+ CFBundleName
+ %s
+ DocSetPlatformFamily
+ python
+
+
+ """ % (python_version.strip().lower().replace(" ", "."), python_version.strip()))
+
+## Create Nodes.xml
+dest_folder = os.path.join(dest_folder, "Resources")
+nodes = codecs.open(os.path.join(dest_folder, "Nodes.xml"), "w", encoding="utf-8")
+nodes.write("""
+
+
+
+ Modules Index
+ %s
+
+
+
+""" % genindex_path)
+
+## Create the tokens file
+token_path = os.path.join(dest_folder, "Tokens.xml")
+dest_folder = os.path.join(dest_folder, "Documents")
+
+## Collect all files in folder
+for path,_,files in os.walk ("."):
+ # Clean up path (remove "./")
+ cleanPath = re.sub (r"^./", "", path)
+ if cleanPath == ".":
+ cleanPath = ""
+ if os.path.samefile (os.path.commonprefix([os.path.abspath(path), docset_folder]), docset_folder):
+ continue
+ if cleanPath != "":
+ os.makedirs (os.path.join (dest_folder, cleanPath))
+
+ # Walk through files
+ for f in files:
+ href = os.path.join (cleanPath, f)
+ print "copying {0} -> {1}".format (href, os.path.join (dest_folder, cleanPath))
+ shutil.copy (href, os.path.join (dest_folder, cleanPath))
+
+## I'll hide the header because it makes no sense in a docset
+## and messes up Dash
+with codecs.open(os.path.join(dest_folder, "_static/basic.css"), "a+", encoding="utf-8") as css:
+ css.write("div.related {display:none;}\n")
+ css.write("div.sphinxsidebar {display:none;}\n")
+
+with codecs.open(os.path.join(dest_folder, "_static/default.css"), "a+", encoding="utf-8") as css:
+ css.write("a.headerlink {display:none;}\n")
+ css.write("div.bodywrapper {margin: 0 0 0 0px;}")
+
+searchindex = None
+with codecs.open(searchindex_path, "r", encoding="utf-8") as sifd:
+ searchindex = json.load (sifd)
+
+funtypenames = ["Module", "Class", "Method", "Class method", "Function", "Exception", "Attribute"]
+funtypedef = ["cat", "cl", "clm", "clm", "func", "cl", "instp"]
+funconv = {}
+
+print ("Available object types :")
+for ft,z in zip (funtypenames, range (len (funtypenames))):
+ print (" {0}: {1}".format (ft, z+1))
+
+print ("Detected object types :")
+for dty in searchindex['objtypes']:
+ stype = searchindex['objtypes'][dty].split (":")[-1]
+
+ # Do some guesswork for common types
+ if stype == 'class':
+ funconv[dty] = "cl"
+ elif stype == 'module':
+ funconv[dty] = "cat"
+ elif stype == 'data':
+ funconv[dty] = "instp"
+ elif stype == 'function':
+ funconv[dty] = "func"
+ elif stype == 'method':
+ funconv[dty] = "clm"
+ elif stype == 'exception':
+ funconv[dty] = "cl"
+ elif stype == 'attribute':
+ funconv[dty] = 'instp'
+ elif stype == 'staticmethod':
+ funconv[dty] = 'clm'
+ elif stype == 'member':
+ funconv[dty] = 'instp'
+ elif stype == 'type':
+ funconv[dty] = 'cl'
+ elif stype == 'var':
+ funconv[dty] = 'instp'
+ elif stype == 'macro':
+ funconv[dty] = 'func'
+
+ # Else, ask the user
+ else:
+ i = raw_input ("Enter type (1-{0}) for '{1}': ".format (len(funtypenames), searchindex['objtypes'][dty]))
+ funconv[dty] = funtypedef[int(i)-1]
+
+with codecs.open(token_path, "w", encoding="utf-8" ) as tokens:
+ ## Start of the tokens file
+ tokens.write("""
+
+ """)
+
+ count = 0
+ for filename in searchindex['filenames']:
+ print ("adding file {0}".format (filename))
+ tokens.write("\n" % filename)
+
+ # Open file
+ with codecs.open (filename+".html", "r", encoding="utf-8") as tmp:
+ # Read HTML structure
+ soup = BeautifulSoup (tmp)
+
+ ## This adds some hidden tags that makes Dash display this page's
+ ## TOC on the left side of the screen, just like with iOS and OSX docs
+ tocdiv = None
+ if tocdiv is None:
+ tocdiv = soup.find('div', 'sphinxsidebarwrapper')
+ if tocdiv is None:
+ tocdiv = soup.find('div', 'sphinxsidebar')
+
+ if tocdiv is not None:
+ toc = tocdiv.findAll("a", "reference")
+ if len(toc) > 0:
+ toc_tag = soup.new_tag("div", style="display:none;")
+ soup.body.append(toc_tag)
+ a_tag = soup.new_tag("a")
+ a_tag["name"] = "#"
+ toc_tag.append(a_tag)
+ h3_tag = soup.new_tag("h3")
+ h3_tag["class"] = "tasks"
+ h3_tag.append("TOC")
+ toc_tag.append(h3_tag)
+ ul_tag = soup.new_tag("ul")
+ ul_tag["class"] = "tooltip"
+ toc_tag.append(ul_tag)
+
+ for t in toc:
+ li_tag = soup.new_tag("li")
+ li_tag["class"] = "tooltip"
+ ul_tag.append(li_tag)
+ a_tag = soup.new_tag("a")
+ a_tag["href"] = t['href']
+ a_tag.append(t.text)
+ li_tag.append(a_tag)
+
+ # ...and write that inside the HTML file
+ with codecs.open(os.path.join (dest_folder, filename+".html"), "w", encoding="utf-8") as newFile:
+ newFile.write(unicode(soup))
+
+ # Look inside each module for elements that belong to this file
+ for mod in searchindex['objects']:
+ for f in searchindex['objects'][mod]:
+ # Location of the keyword
+ dat = searchindex['objects'][mod][f]
+
+ # Type of the keyword
+ fty = funconv[str(dat[1])]
+
+ # Use point-separated names for Python docs
+ if mod != '':
+ f = mod + "." + f
+
+ # If this keyword belongs to this file, then go on
+ if dat[0] == count:
+ # Try to find anchor with the same name as the keyword
+ anchor = None
+
+ # Try for exact match
+ tabN = soup.findAll (lambda x:('id' in x.attrs and f == x.attrs['id']))
+
+ # If not, try for lower-case match
+ if len(tabN) == 0:
+ tabL = soup.findAll (lambda x:('id' in x.attrs and f.lower() == x.attrs['id']))
+ if len(tabL) > 0:
+ anchor = f.lower()
+
+ # If not, try to guess alternative matches
+ else:
+ tabA = soup.findAll (lambda x:('dt' == x.name and 'id' in x.attrs and (f in x.attrs['id'].lower() or f.lower() in x.attrs['id'].lower())))
+ if len(tabA) == 0:
+ print "Warning: Did not find {0} in {1}".format (f, filename+".html")
+ else:
+ for _fta in tabA:
+ _fa = _fta.attrs['id']
+ g1 = re.match (r"^(?P([^\s]+\s+)*?)([^\s\(\)]+::)*(?P[^\s\(\):\<\>]+)(\<[^\<\>]*\>)?\(.*\)[^\(\)]*$", _fa)
+ g2 = re.match (r"^(?P([^\s]+\s+)*?)([^\s\(\)]+::)*(?P[^\s\(\):\<\>]+)$", _fa)
+ g3 = re.match (r"^(?P([^\.]+\.)*?)(?P[^\s\(\):\.\<\>]+)$", _fa)
+ if (g1 is not None and g1.group('fname') == f) or (g2 is not None and g2.group('fname') == f) or (g3 is not None and g3.group('fname') == f):
+ anchor = _fa
+ print "Warning: Did not find exact match for {0} in {1}, defaulting to {2}".format (f, filename+".html", anchor)
+ break
+ print "Warning: Did not find any match for {0} in {1}".format (f, filename+".html")
+ else:
+ anchor = f
+
+ # See if this is a full C(++) function description, in which case extract the name
+ name = f
+ if ' ' in f or '(' in f or ')' in f or ':' in f:
+ g = re.match (r"^(?P([^\s]+\s+)*?)([^\s\(\)]+::)*(?P[^\s\(\):\<\>]+)(\<[^\<\>]*\>)?\(.*\)[^\(\)]*$", f)
+ if g is None:
+ # Match variable name
+ g = re.match (r"^(?P([^\s]+\s+)*?)([^\s\(\)]+::)*(?P[^\s\(\):\<\>]+)$", f)
+ if g is None:
+ continue
+ else:
+ name = g.group('fname')
+ else:
+ name = g.group('fname')
+
+
+ # Write identifier
+ name = escape (name)
+ tokens.write ("\t//apple_ref/cpp/%s/%s" % (fty, name))
+
+ # Write anchor if needed
+ if anchor is not None:
+ anchor = escape (anchor)
+ tokens.write ("%s" % anchor)
+
+ # ...and finalize
+ tokens.write ("\n")
+
+ count = count + 1
+ tokens.write("\n")
+ tokens.write("")
+
+try:
+
+ print("calling docsetutil")
+ subprocess.call([docsetutil_path, "index", docset_folder])
+
+except OSError as e:
+
+ print("something went wrong trying to call docsetutil: ", e)
+
+## Cleanup
+os.remove(os.path.join(docset_folder, "Contents/Resources/Nodes.xml"))
+os.remove(os.path.join(docset_folder, "Contents/Resources/Tokens.xml"))
+
+print ("done")
diff --git a/PythonDocs2DocSet/create_docset_json.sh b/PythonDocs2DocSet/create_docset_json.sh
new file mode 100755
index 0000000..bd1be67
--- /dev/null
+++ b/PythonDocs2DocSet/create_docset_json.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Base Script File (convert.sh)
+# Created: Wed Mar 14 01:20:32 2012
+# Version: 1.0
+# Author: François-Xavier Thomas
+#
+# This Bash script was developped by François-Xavier Thomas.
+# You are free to copy, adapt or modify it.
+# If you do so, however, leave my name somewhere in the credits, I'd appreciate it ;)
+
+JS_DICT=$(cat searchindex.js | sed 's/Search\.setIndex(\(.*\))/\1/g')
+
+echo "o = $JS_DICT;" >> _tmp.js
+echo "process.stdout.write (JSON.stringify (o));" >> _tmp.js
+
+node _tmp.js > searchindex.json
+rm _tmp.js
+
+python `dirname $0`/create_docset_json.py