修訂 | 13b586d87591ed5688e94d80c44166ef20d049a0 (tree) |
---|---|
時間 | 2019-12-15 00:46:56 |
作者 | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@fast...> |
Commiter | Jaime Marquínez Ferrándiz |
Commit inicial
@@ -0,0 +1,6 @@ | ||
1 | +venv | |
2 | +build | |
3 | +# Default ignored files | |
4 | +.idea/workspace.xml | |
5 | + | |
6 | +cee.xml |
@@ -0,0 +1,6 @@ | ||
1 | +<component name="InspectionProjectProfileManager"> | |
2 | + <settings> | |
3 | + <option name="USE_PROJECT_PROFILE" value="false" /> | |
4 | + <version value="1.0" /> | |
5 | + </settings> | |
6 | +</component> | |
\ No newline at end of file |
@@ -0,0 +1,4 @@ | ||
1 | +<?xml version="1.0" encoding="UTF-8"?> | |
2 | +<project version="4"> | |
3 | + <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (sword-biblia-cee)" project-jdk-type="Python SDK" /> | |
4 | +</project> | |
\ No newline at end of file |
@@ -0,0 +1,8 @@ | ||
1 | +<?xml version="1.0" encoding="UTF-8"?> | |
2 | +<project version="4"> | |
3 | + <component name="ProjectModuleManager"> | |
4 | + <modules> | |
5 | + <module fileurl="file://$PROJECT_DIR$/.idea/sword-biblia-cee.iml" filepath="$PROJECT_DIR$/.idea/sword-biblia-cee.iml" /> | |
6 | + </modules> | |
7 | + </component> | |
8 | +</project> | |
\ No newline at end of file |
@@ -0,0 +1,13 @@ | ||
1 | +<?xml version="1.0" encoding="UTF-8"?> | |
2 | +<module type="PYTHON_MODULE" version="4"> | |
3 | + <component name="NewModuleRootManager"> | |
4 | + <content url="file://$MODULE_DIR$"> | |
5 | + <excludeFolder url="file://$MODULE_DIR$/venv" /> | |
6 | + </content> | |
7 | + <orderEntry type="jdk" jdkName="Python 3.7 (sword-biblia-cee)" jdkType="Python SDK" /> | |
8 | + <orderEntry type="sourceFolder" forTests="false" /> | |
9 | + </component> | |
10 | + <component name="TestRunnerService"> | |
11 | + <option name="PROJECT_TEST_RUNNER" value="Unittests" /> | |
12 | + </component> | |
13 | +</module> | |
\ No newline at end of file |
@@ -0,0 +1,6 @@ | ||
1 | +<?xml version="1.0" encoding="UTF-8"?> | |
2 | +<project version="4"> | |
3 | + <component name="VcsDirectoryMappings"> | |
4 | + <mapping directory="$PROJECT_DIR$" vcs="hg4idea" /> | |
5 | + </component> | |
6 | +</project> | |
\ No newline at end of file |
@@ -0,0 +1,22 @@ | ||
1 | +NOMBRE_MODULO = cee | |
2 | + | |
3 | +OUT_DIR = build | |
4 | + | |
5 | +OSIS_FILE = $(NOMBRE_MODULO).xml | |
6 | + | |
7 | +MOD_FILES = nt nt.vss ot ot.vss | |
8 | +MOD_FILES != for f in $(MOD_FILES); do echo $(OUT_DIR)/$$f; done | |
9 | +ZMOD_FILES = nt.bzs nt.bzv nt.bzz ot.bzs ot.bzv ot.bzz | |
10 | +ZMOD_FILES != for f in $(ZMOD_FILES); do echo $(OUT_DIR)/$$f; done | |
11 | + | |
12 | +all: $(MOD_FILES) | |
13 | + | |
14 | +$(MOD_FILES): $(OSIS_FILE) | |
15 | + mkdir -p $(OUT_DIR) | |
16 | + osis2mod $(OUT_DIR) $^ | |
17 | + | |
18 | +$(ZMOD_FILES): $(MOD_FILES) | |
19 | + mod2zmod $(NOMBRE_MODULO) . 4 2 | |
20 | + | |
21 | +clean: | |
22 | + rm -f $(MOD_FILES) $(ZMOD_FILES) |
@@ -0,0 +1,9 @@ | ||
1 | +[CEE] | |
2 | +ModDrv=RawText | |
3 | +DataPath=./modules/texts/ztext/cee/ | |
4 | +CompressType=ZIP | |
5 | +BlockType=BOOK | |
6 | +Encoding=UTF-8 | |
7 | +SourceType=OSIS | |
8 | +Lang=es | |
9 | +Description=Biblia de la Conferencia Episcopal Española | |
\ No newline at end of file |
@@ -0,0 +1,56 @@ | ||
1 | +import requests | |
2 | +from bs4 import BeautifulSoup | |
3 | +from lxml import etree as et | |
4 | + | |
5 | + | |
6 | +NS_DEFAULT = 'http://www.bibletechnologies.net/2003/OSIS/namespace' | |
7 | +NAMESPACE_MAP = { | |
8 | + '': NS_DEFAULT, | |
9 | + None: NS_DEFAULT | |
10 | +} | |
11 | + | |
12 | + | |
13 | +def osis_id_para_libro(nombre_libro: str): | |
14 | + if nombre_libro == 'Génesis': | |
15 | + return 'Gen' | |
16 | + | |
17 | + | |
18 | +def construir_libro(url: str, nombre_libro: str, session: requests.Session): | |
19 | + print(f'Construyendo "{nombre_libro}"') | |
20 | + osis_id = osis_id_para_libro(nombre_libro) | |
21 | + r = session.get(url) | |
22 | + html_doc = r.text | |
23 | + soup = BeautifulSoup(html_doc, 'lxml') | |
24 | + | |
25 | + book = et.Element('div', type='book', osisID=osis_id) | |
26 | + for chapter_tag in soup.find_all(class_='capitulo'): | |
27 | + chap_num = chapter_tag.find(class_='numcap').text | |
28 | + chapter = et.SubElement(book, 'chapter', osisID=osis_id + f'.{chap_num}') | |
29 | + for numverse_tag in chapter_tag.find_all(class_='numvers'): | |
30 | + verse = et.SubElement(chapter, 'verse', osisID=osis_id + f'.{chap_num}.{numverse_tag.text}') | |
31 | + verse.text = numverse_tag.find_next_sibling(class_='contenido').text | |
32 | + return book | |
33 | + | |
34 | + | |
35 | +def construir_sesion(): | |
36 | + """ | |
37 | + Construye un objeto de sesión | |
38 | + """ | |
39 | + session = requests.Session() | |
40 | + session.headers.update({'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0'}) | |
41 | + return session | |
42 | + | |
43 | + | |
44 | +def construir_biblia(): | |
45 | + parser = et.XMLParser(remove_blank_text=True) | |
46 | + osis_doc: et.ElementTree = et.parse('template_osis.xml', parser) | |
47 | + sesion = construir_sesion() | |
48 | + libro = construir_libro('https://www.conferenciaepiscopal.es/biblia/genesis', 'Génesis', sesion) | |
49 | + osis_text = osis_doc.find('osisText', NAMESPACE_MAP) | |
50 | + osis_text.append(libro) | |
51 | + with open('cee.xml', 'wb') as out_file: | |
52 | + out_file.write(et.tostring(osis_doc, pretty_print=True)) | |
53 | + | |
54 | + | |
55 | +if __name__ == '__main__': | |
56 | + construir_biblia() | |
\ No newline at end of file |
@@ -0,0 +1,3 @@ | ||
1 | +requests | |
2 | +beautifulsoup4 | |
3 | +lxml | |
\ No newline at end of file |
@@ -0,0 +1,29 @@ | ||
1 | +<?xml version='1.0' encoding='UTF-8'?> | |
2 | +<osis xsi:schemaLocation='http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.2.1.1.xsd' xmlns='http://www.bibletechnologies.net/2003/OSIS/namespace' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'> | |
3 | + <osisText osisIDWork='cee' xml:lang='en' osisRefWork='Bible'> | |
4 | + <header> | |
5 | + <revisionDesc> | |
6 | + <date>2019-11-16</date> | |
7 | + <p>initial OSIS 2.1.1 version</p> | |
8 | + </revisionDesc> | |
9 | + <work osisWork='cee'> | |
10 | + <title>Versión de la Conferencia Episcopal</title> | |
11 | + <contributor/> | |
12 | + <creator/> | |
13 | + <subject/> | |
14 | + <date>2016</date> | |
15 | + <description /> | |
16 | + <publisher /> | |
17 | + <type type='OSIS'>Bible</type> | |
18 | + <identifier type='OSIS'>cee</identifier> | |
19 | + <source>https://www.conferenciaepiscopal.es/Biblia/</source> | |
20 | + <language type='IETF'>es</language> | |
21 | + <relation/> | |
22 | + <coverage /> | |
23 | + <rights>Conferencia Episcopal Española. 2016</rights> | |
24 | + <scope/> | |
25 | + <refSystem>Bible</refSystem> | |
26 | + </work> | |
27 | + </header> | |
28 | + </osisText> | |
29 | +</osis> | |
\ No newline at end of file |