remove dead code

lucasg · Aug 16, 2017 · 81febaf · 81febaf
1 parent cad579c
commit 81febaf
Showing 1 changed file with 0 additions and 227 deletions.
diff --git a/posh-to-dash.py b/posh-to-dash.py
@@ -126,233 +126,6 @@ def download_textfile(url : str ,  output_filename : str, params : dict = None):
     r = requests.get(url, data = params)
     with open(output_filename, 'w', encoding="utf8") as f:
         f.write(r.text)
-
-
-def download_as_browser(url, output_filename):
-    global global_driver
-    #driver = webdriver.PhantomJS(executable_path="C:\\Users\\lucas\\AppData\\Roaming\\npm\\node_modules\\phantomjs-prebuilt\\lib\\phantom\\bin\phantomjs.exe")
-    global_driver.get(url)
-
-    #soupFromJokesCC = BeautifulSoup(driver.page_source) #page_source fetches page after rendering is complete
-    with open(output_filename, 'w', encoding="utf8") as f:
-        f.write(global_driver.page_source)
-
-    #global_driver.save_screenshot(output_filename+'screen.png') # save a screenshot to disk
-
-
-
-def download_and_fix_links(url, output_filepath, posh_version = Configuration.posh_version, is_index =  False, documents_folder = None):
-    """ Download and fix broken nav paths for modules index """
-    global global_driver
-    # r = requests.get(url)
-    # index_html = r.text
-    #driver = webdriver.PhantomJS(executable_path="C:\\Users\\lucas\\AppData\\Roaming\\npm\\node_modules\\phantomjs-prebuilt\\lib\\phantom\\bin\phantomjs.exe")
-    try:
-        global_driver.get(url)
-        index_html = global_driver.page_source
-    except (ConnectionResetError, urllib.error.URLError) as e:
-        # we may have a triggered a anti-scraping time ban
-        # Lay low for several seconds and get back to it.
-
-        global_driver.quit()
-        global_driver = webdriver.PhantomJS(executable_path="C:\\Users\\lucas\\AppData\\Roaming\\npm\\node_modules\\phantomjs-prebuilt\\lib\\phantom\\bin\phantomjs.exe")
-        time.sleep(2)
-        index_html = None
-
-    # try a second time, and raise error if fail
-    if not index_html:
-        global_driver.get(url)
-        index_html = global_driver.page_source
-
-
-    soup = bs(index_html, 'html.parser')
-
-
-    links = soup.findAll("a", { "data-linktype" : "relative-path"}) # for modules and cmdlet pages
-    if is_index: # for index page
-        content_table = soup.findAll("table", { "class" : "api-search-results standalone"})[0]
-        links = content_table.findAll(lambda tag: tag.name == 'a' and 'ms.title' in tag.attrs)    # for index page   
-
-    for link in links:
-        # search replace <a href="(\w+-\w+)\?view=powershell-6" data-linktype="relative-path">
-        #                <a href="$1.html" data-linktype="relative-path">
-        if is_index:
-            link_str_pattern = "([\w\.\/]+)\?view=powershell-"
-        else:
-            link_str_pattern = "(\w+-\w+)\?view=powershell-"
-
-        link_pattern = re.compile(link_str_pattern)
-        targets = link_pattern.findall(link['href'])
-        if not len(targets): # badly formated 'a' link
-            continue
-
-        if is_index:
-            uri_path = targets[0].lstrip('/').rstrip('/')
-            fixed_link = soup.new_tag("a", href="%s/index.html" % (uri_path), **{ "ms.title" : link["ms.title"]})
-        else:
-            fixed_link = soup.new_tag("a", href="%s.html" % targets[0], **{ "data-linktype" : "relative-path"})
-
-        print(link['href'], " -> ", fixed_link['href']) 
-        fixed_link.string = link.string
-        link.replaceWith(fixed_link)
-
-    # remove unsupported nav elements
-    nav_elements = [
-        ["nav", { "class" : "doc-outline", "role" : "navigation"}],
-        ["ul", { "class" : "breadcrumbs", "role" : "navigation"}],
-        ["div", { "class" : "sidebar", "role" : "navigation"}],
-        ["div", { "class" : "dropdown dropdown-full mobilenavi"}],
-        ["p", { "class" : "api-browser-description"}],
-        ["div", { "class" : "api-browser-search-field-container"}],
-        ["div", { "class" : "pageActions"}],
-        ["div", { "class" : "dropdown-container"}],
-    ]
-
-    for nav in nav_elements:
-        nav_class, nav_attr = nav
-
-        for nav_tag in soup.findAll(nav_class, nav_attr):
-            _ = nav_tag.extract()
-
-    # Fix themes uri paths
-    soup = crawl_posh_themes(documents_folder, soup, output_filepath)
-
-    # Export fixed html
-    with open(output_filepath, 'wb') as o_index:
-
-        fixed_html = soup.prettify("utf-8")
-        o_index.write(fixed_html)
-
-    #global_driver.save_screenshot(output_filepath+'screen.png') # save a screenshot to disk
-    return index_html
-
-
-def crawl_posh_themes(documents_folder, soup, current_filepath):
-
-    theme_output_dir = os.path.join(documents_folder, domain)
-
-    # downloading stylesheets
-    for link in soup.findAll("link", { "rel" : "stylesheet"}):
-        uri_path = link['href'].strip()
-
-        if uri_path.lstrip('/').startswith(default_theme_uri):
-
-            css_url = "https://%s/%s" % (domain, uri_path)
-            css_filepath =  os.path.join(theme_output_dir, uri_path.lstrip('/'))
-
-            os.makedirs(os.path.dirname(css_filepath), exist_ok = True)
-
-            # do not download twice the same file
-            if not os.path.exists(css_filepath):
-                download_textfile(css_url, css_filepath)
-
-                # fix source map css
-                # $hex_encoded_id.$name.css -> $name.css
-                css_filename = os.path.basename(uri_path)
-                css_dirname  = os.path.dirname(css_filepath)
-
-                r = re.compile("\w+\.([\w\.]+)")
-                sourcemap_css_filename = r.match(css_filename).groups()[0]
-                download_textfile(css_url, os.path.join(css_dirname, sourcemap_css_filename))
-
-            # Converting to a relative link
-            path = os.path.relpath(css_filepath, os.path.dirname(current_filepath))
-            rel_uri = '/'.join(path.split(os.sep))
-            link['href'] = rel_uri
-
-    # downloading scripts
-    for script in soup.findAll("script", {"src":True}):
-        uri_path = script['src']
-
-        if  uri_path.lstrip('/').startswith(default_theme_uri):
-
-            script_url = "https://%s/%s" % (domain, uri_path)
-
-            # path normalization : we can do better
-            script_path = uri_path.lstrip('/')
-            if -1 != script_path.find('?v='):
-                script_path = script_path[0:script_path.find('?v=')]
-
-            script_filepath =  os.path.join(theme_output_dir, script_path)
-            os.makedirs(os.path.dirname(script_filepath), exist_ok = True)
-
-            # do not download twice the same file
-            if not os.path.exists(script_filepath):
-                download_textfile(script_url, script_filepath)
-
-            # Converting to a relative link
-            path = os.path.relpath(script_filepath, current_filepath)
-            rel_uri = '/'.join(path.split(os.sep))
-            script['src'] = rel_uri
-
-    return soup
-
-
-def crawl_posh_documentation(documents_folder, powershell_version = Configuration.posh_version):
-    """ Crawl and download Posh modules documentation """
-
-    index = default_url % powershell_version
-    modules_toc = default_toc % (powershell_version, powershell_version)
-
-    index_filepath = os.path.join(documents_folder, domain, "en-us", "index.html")
-    download_and_fix_links(index, index_filepath, is_index= True, posh_version = powershell_version, documents_folder = documents_folder)        
-
-    modules_filepath = os.path.join(documents_folder, "modules.toc")
-    download_textfile(modules_toc, modules_filepath)
-
-    theme_output_dir = os.path.join(documents_folder, domain, default_theme_uri)
-    os.makedirs(theme_output_dir, exist_ok = True)
-
-    with open(modules_filepath, 'r') as modules_fd:
-        modules = json.load(modules_fd)
-
-        for module in modules['items'][0]['children']:
-
-            module_url = urllib.parse.urljoin(modules_toc, module["href"])
-            module_url = "%s/?view=powershell-%s" % (module_url, powershell_version)
-
-            module_dir = os.path.join(documents_folder, base_url, module['toc_title'])
-            module_filepath = os.path.join(module_dir, "index.html")
-            os.makedirs(module_dir, exist_ok = True)
-
-            logging.debug("downloading modules doc %s -> %s" %(module_url, module_filepath))
-            mod_html = download_and_fix_links(module_url, module_filepath, posh_version = powershell_version, documents_folder = documents_folder)
-
-
-            for cmdlet in module['children']:
-                cmdlet_name = cmdlet['toc_title']
-
-                if cmdlet_name.lower() in ("about", "functions", "providers", "provider"): # skip special toc
-                    continue
-
-                logging.debug("cmdlet %s" % cmdlet)
-
-                cmdlet_urlpath = cmdlet["href"]
-                cmdlet_url = urllib.parse.urljoin(modules_toc, cmdlet_urlpath)
-                cmdlet_url = "%s?view=powershell-%s" % (cmdlet_url, powershell_version)
-
-                cmdlet_filepath = os.path.join(module_dir, "%s.html" % cmdlet_name)
-
-                logging.debug("downloading cmdlet doc %s -> %s" %(cmdlet_url, cmdlet_filepath))
-                cmdlet_html = download_and_fix_links(cmdlet_url, cmdlet_filepath, posh_version = powershell_version, documents_folder = documents_folder)
-
-
-def insert_into_sqlite_db(cursor, name, record_type, path):
-    """ Insert a new unique record in the sqlite database. """
-    try:
-        cursor.execute('SELECT rowid FROM searchIndex WHERE path = ?', (path,))
-        dbpath = cursor.fetchone()
-        cursor.execute('SELECT rowid FROM searchIndex WHERE name = ?', (name,))
-        dbname = cursor.fetchone()
-
-        if dbpath is None and dbname is None:
-            cursor.execute('INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?,?,?)', (name, record_type, path))
-            logging.debug('DB add [%s] >> name: %s, path: %s' % (record_type, name, path))
-        else:
-            logging.debug('record exists')
-
-    except:
-        pass
 
 
 def make_docset(source_dir, dst_dir, filename):