From 127a90e6f0d93606ba3a6c24656127c5b047f514 Mon Sep 17 00:00:00 2001
From: Francisco Mesa <franciscomesa@gmail.com>
Date: Mon, 24 Feb 2020 14:09:31 +0000
Subject: [PATCH] Update testing01.py

---
 testing01.py | 73 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 64 insertions(+), 9 deletions(-)

diff --git a/testing01.py b/testing01.py
index 5fb6e8e..9bc1b22 100644
--- a/testing01.py
+++ b/testing01.py
@@ -3,15 +3,70 @@
 import urllib
 import json
 import wikipediaapi
+from bs4 import BeautifulSoup
+import pprint
 
 # https://es.wikipedia.org/w/api.php?action=query&list=categorymembers&cmpageid=132668&format=json&cmlimit=500
 # https://www.mediawiki.org/wiki/API:Categorymembers
 
-def print_categorymembers(categorymembers, level=0, max_level=10):
+
+# Hay diferentes tipos de vcard 
+#   infobox
+#
+#
+def getVcard(pagina):
+    print("\t\t", pagina)
+    page = urllib.request.urlopen(pagina)
+    soup = BeautifulSoup(page.read(),"html.parser" )
+    table = soup.find('table', class_='infobox')
+    result = {}
+    exceptional_row_count = 0
+    for tr in table.find_all('tr'):
+        if tr.find('th') and tr.find('td') != None:
+            result[tr.find('th').text] = tr.find('td').text
+        else:
+            # the first row Logos fall here
+            exceptional_row_count += 1
+    if exceptional_row_count > 1:
+        print('') # 'WARNING ExceptionalRow>1: ', table)
+    print(".......RESULTADO:", pprint.pformat(result))
+
+
+
+# Devuelve las keys de un diccionario como una lista
+def getList(dict): 
+    list = [] 
+    for key in dict.keys(): 
+        list.append(key) 
+          
+    return list
+
+
+# De la documentación de Wikipediaapi
+# Muestra las secciones de la página
+def print_sections(sections, level=0):
+        for s in sections:
+                print("\t\t%s: %s - %s" % ("*" * (level + 1), s.title, s.text[0:40]))
+                print_sections(s.sections, level + 1)
+
+
+
+def print_categorymembers(categorymembers, level=0, max_level=2):
         for c in categorymembers.values():
-            print("%s: %s (ns: %d)" % ("*" * (level + 1), c.title, c.ns))
+            
+#            print("%s: %s (ns: %d) -> l:%d" % ("*" * (level + 1), c.title, c.ns, len(c.backlinks)  ))            
             if c.ns == wikipediaapi.Namespace.CATEGORY and level < max_level:
                 print_categorymembers(c.categorymembers, level=level + 1, max_level=max_level)
+            elif c.ns == wikipediaapi.Namespace.MAIN  :
+                print("%s: %s (ns: %d) -> l:%d" % ("*" * (level + 1), c.title, c.ns, len(c.backlinks)  )) 
+                print("\t", len(c.sections) ) # print_sections
+                wiki = wikipediaapi.Wikipedia(LANG)
+                page = wiki.page(c.title)
+                print("\t", page.fullurl  )
+                getVcard(page.fullurl)
+                print("\t%d > %s" % (len(c.langlinks),str(getList(c.langlinks))) )
+                #print("\t", str(getList(c.langlinks)))
+                
 
 
 
@@ -49,11 +104,11 @@ def listaCategoriasporID(pageid):
 LANG = "es"
 CATEGORIAORIGEN = "Category:Canarios"
 IDCATEGORIAORIGEN = 132668
+AGENT = {"User-Agent': 'Mozilla/5.0"}
 
 ###CATEGORIAORIGEN   = "Category:Naturales_de_la_provincia_de_Las_Palmas"
 ###IDCATEGORIAORIGEN = 4343370
 
-
 wikipedia.set_lang(LANG)  
 
 #print(wikipedia.search("Bill"))
@@ -64,7 +119,7 @@ def listaCategoriasporID(pageid):
 print(canarios.links)
 
 # Library wikipedia-api
-wiki_wiki = wikipediaapi.Wikipedia('es')
+wiki_wiki = wikipediaapi.Wikipedia(LANG)
 
 
 
@@ -80,10 +135,10 @@ def listaCategoriasporID(pageid):
 print("https://es.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=" + CATEGORIAORIGEN + "&format=json&cmlimit=5000&cmtype=subcat")
 
 
-print ("Obtenemos subcategorías")
-subcategorias = listaCategoriasporID(IDCATEGORIAORIGEN)
-print(subcategorias)
-print ("Obtenemos páginas")
+#print ("Obtenemos subcategorías")
+#subcategorias = listaCategoriasporID(IDCATEGORIAORIGEN)
+#print(subcategorias)
+#print ("Obtenemos páginas")
 paginas = listaPaginasporID(IDCATEGORIAORIGEN)
 print("paginas")
 print(paginas)
@@ -97,4 +152,4 @@ def listaCategoriasporID(pageid):
     pagina = wikipedia.page(None,p)
     print(pagina)
     page_py = wiki_wiki.page(p)
-    print(page_py)
\ No newline at end of file
+    print(page_py)