wip
This commit is contained in:
@@ -1,2 +1,10 @@
|
||||
# markepub
|
||||
|
||||
|
||||
normalize-md
|
||||
|
||||
inplace, take markdown and
|
||||
|
||||
- remove double empty lines
|
||||
- add line break after every full-stop
|
||||
|
||||
|
||||
0
markepub/__init__.py
Normal file
0
markepub/__init__.py
Normal file
156
markepub/de_meta.py
Normal file
156
markepub/de_meta.py
Normal file
@@ -0,0 +1,156 @@
|
||||
from collections.abc import Mapping, Sequence
|
||||
|
||||
import pandoc
|
||||
from lxml import etree
|
||||
from lxml.builder import E
|
||||
# noinspection PyUnresolvedReferences
|
||||
from pandoc.types import Meta, MetaMap, MetaList, MetaBool, MetaString, MetaInlines, MetaBlocks, Str, Emph, Underline, \
|
||||
Strong, Strikeout, Superscript, Subscript, SmallCaps, Quoted, Cite, Code, Space, SoftBreak, LineBreak, Math
|
||||
|
||||
from util import get_xhtml_template
|
||||
|
||||
|
||||
def resolve_inline(value):
|
||||
if isinstance(value, Str): return value[0]
|
||||
# if isinstance(value, Emph): return value
|
||||
# if isinstance(value, Underline): return value
|
||||
# if isinstance(value, Strong): return value
|
||||
# if isinstance(value, Strikeout): return value
|
||||
# if isinstance(value, Superscript): return value
|
||||
# if isinstance(value, Subscript): return value
|
||||
# if isinstance(value, SmallCaps): return value
|
||||
# if isinstance(value, Quoted): return value
|
||||
# if isinstance(value, Cite): return value
|
||||
# if isinstance(value, Code): return value
|
||||
if isinstance(value, Space): return ' '
|
||||
if isinstance(value, SoftBreak): return ' '
|
||||
if isinstance(value, LineBreak): return '\n'
|
||||
# if isinstance(value, Math): return value
|
||||
return value
|
||||
|
||||
def resolve_meta_value(value):
|
||||
if isinstance(value, MetaMap): return PyMetaMap(value)
|
||||
if isinstance(value, MetaList): return PyMetaList(value)
|
||||
if isinstance(value, (MetaBool, MetaString)): return value[0]
|
||||
if isinstance(value, MetaInlines): return PyMetaInlines(value)
|
||||
if isinstance(value, MetaBlocks): return PyMetaBlocks(value)
|
||||
return value
|
||||
|
||||
class _Sequence(Sequence):
|
||||
|
||||
def __getitem__(self, index: int):
|
||||
return self._data[index]
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._data)
|
||||
|
||||
def __init__(self, p) -> None:
|
||||
self._data = [v for v in p[0]]
|
||||
|
||||
|
||||
class PyMetaBlocks(_Sequence): pass
|
||||
class PyMetaInlines(_Sequence): pass
|
||||
|
||||
|
||||
# noinspection PyMissingConstructor
|
||||
class PyMetaList(_Sequence):
|
||||
|
||||
def __init__(self, p) -> None:
|
||||
self._data = [resolve_meta_value(v) for v in p[0]]
|
||||
|
||||
|
||||
class PyMetaMap(Mapping):
|
||||
|
||||
def __getitem__(self, key: str, /):
|
||||
return self._data[key]
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._data)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._data)
|
||||
|
||||
def __init__(self, pandoc_meta):
|
||||
self._data = {k: resolve_meta_value(v) for k, v in self._pandoc[0].items()}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
data = Meta({
|
||||
'creator': MetaList([
|
||||
MetaMap({
|
||||
'file-as': MetaInlines([
|
||||
Str('Riter,'),
|
||||
Space(),
|
||||
Str('E.Z.')]),
|
||||
'role': MetaInlines([
|
||||
Str('aut')]),
|
||||
'text': MetaInlines([
|
||||
Str('E.Z.'),
|
||||
Space(),
|
||||
Str('Riter')])})]),
|
||||
'description': MetaInlines([
|
||||
Str('Karen'),
|
||||
Space(),
|
||||
Str('meets'),
|
||||
Space(),
|
||||
Str('the'),
|
||||
Space(),
|
||||
Str('man'),
|
||||
Space(),
|
||||
Str('she'),
|
||||
Space(),
|
||||
Str('can’t'),
|
||||
Space(),
|
||||
Str('resist'),
|
||||
Space(),
|
||||
Str('—'),
|
||||
Space(),
|
||||
Str('the'),
|
||||
Space(),
|
||||
Str('man'),
|
||||
Space(),
|
||||
Str('who'),
|
||||
Space(),
|
||||
Str('can'),
|
||||
Space(),
|
||||
Str('do'),
|
||||
Space(),
|
||||
Str('anything'),
|
||||
Space(),
|
||||
Str('to'),
|
||||
Space(),
|
||||
Str('her,'),
|
||||
Space(),
|
||||
Str('and'),
|
||||
Space(),
|
||||
Str('she'),
|
||||
Space(),
|
||||
Str('will'),
|
||||
Space(),
|
||||
Str('love'),
|
||||
Space(),
|
||||
Str('it'),
|
||||
Space(),
|
||||
Str('—'),
|
||||
Space(),
|
||||
Str('and'),
|
||||
Space(),
|
||||
Str('him.')]),
|
||||
'language': MetaInlines([Str('en')]),
|
||||
'published': MetaInlines([Str('2002-07-24')]),
|
||||
'source': MetaInlines([Str('https://www.bdsmlibrary.info/stories/story.php?storyid=1101')]),
|
||||
'subject': MetaList([
|
||||
MetaInlines([Str('M/f')]),
|
||||
MetaInlines([Str('pregnant')]),
|
||||
MetaInlines([Str('spanking')]),
|
||||
MetaInlines([Str('D/s')]),
|
||||
MetaInlines([Str('real')]),
|
||||
MetaInlines([Str('reluctant')]),
|
||||
MetaInlines([Str('Serious')])]),
|
||||
'title': MetaMap({
|
||||
'file-as': MetaInlines([Str('Karen')]),
|
||||
'text': MetaInlines([Str('Karen')])})})
|
||||
|
||||
m = PyMetaMap(data)
|
||||
|
||||
print(m)
|
||||
215
markepub/frontmatter.py
Normal file
215
markepub/frontmatter.py
Normal file
@@ -0,0 +1,215 @@
|
||||
import datetime
|
||||
import uuid
|
||||
from lxml import etree
|
||||
from lxml.builder import E
|
||||
|
||||
import yaml
|
||||
|
||||
from markepub.util import get_xhtml_template
|
||||
|
||||
# DublinCore Elements:
|
||||
# - contributor
|
||||
# - coverage
|
||||
# + creator
|
||||
# + date
|
||||
# + description
|
||||
# - format
|
||||
# + identifier
|
||||
# + language
|
||||
# + publisher
|
||||
# - relation
|
||||
# - rights
|
||||
# + source
|
||||
# + subject
|
||||
# + title
|
||||
# - type
|
||||
|
||||
# Calibre
|
||||
# + series
|
||||
# + series_index
|
||||
|
||||
|
||||
DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
|
||||
OPF_NAMESPACE = "http://www.idpf.org/2007/opf"
|
||||
DC = f'{{{DC_NAMESPACE}}}'
|
||||
OPF = f'{{{OPF_NAMESPACE}}}'
|
||||
|
||||
|
||||
|
||||
|
||||
class _Scalar:
|
||||
CLASS = ''
|
||||
TAG = DC + CLASS
|
||||
|
||||
def __init__(self, value):
|
||||
self.value = value.strip() if isinstance(value, str) else value
|
||||
|
||||
def __str__(self):
|
||||
return str(self.value)
|
||||
|
||||
def __repr__(self):
|
||||
return self.value
|
||||
|
||||
@property
|
||||
def element(self):
|
||||
e = etree.Element(self.TAG)
|
||||
e.text = self.value
|
||||
return e
|
||||
|
||||
@property
|
||||
def as_title_page(self):
|
||||
return E.tr(E.td(self.value, **{'class': self.CLASS}))
|
||||
|
||||
|
||||
class _HasFileAs(_Scalar):
|
||||
|
||||
def __init__(self, value: str, file_as: str = None):
|
||||
super().__init__(value=value)
|
||||
self.file_as = file_as.strip()
|
||||
|
||||
@classmethod
|
||||
def from_yaml(cls, value: str | dict[str, str]):
|
||||
if isinstance(value, str):
|
||||
return cls(value=value)
|
||||
return cls(value=value.get('text', value['value']), file_as=value.get('file-as', None))
|
||||
|
||||
@property
|
||||
def element(self):
|
||||
e = super().element
|
||||
if self.file_as:
|
||||
e.set(OPF + 'file-as', self.file_as)
|
||||
return e
|
||||
|
||||
|
||||
class Author(_HasFileAs):
|
||||
CLASS = 'author'
|
||||
TAG = DC + 'creator'
|
||||
|
||||
@property
|
||||
def element(self):
|
||||
e = super().element
|
||||
e.set(OPF + 'role', 'aut')
|
||||
return e
|
||||
|
||||
class Title(_HasFileAs):
|
||||
CLASS = 'title'
|
||||
|
||||
|
||||
class _Date(_Scalar):
|
||||
TAG = DC + 'date'
|
||||
|
||||
|
||||
class Published(_Date):
|
||||
|
||||
@property
|
||||
def element(self):
|
||||
e = super().element
|
||||
e.set(OPF + 'event', 'publication')
|
||||
return e
|
||||
|
||||
class Modified(_Date):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(value=None)
|
||||
|
||||
@property
|
||||
def element(self):
|
||||
e = super().element
|
||||
e.text = datetime.datetime.now(datetime.UTC).strftime('%Y-%m-%d %H-%M-%S')
|
||||
e.set(OPF + 'event', 'modification')
|
||||
return e
|
||||
|
||||
class Description(_Scalar):
|
||||
CLASS = 'description'
|
||||
|
||||
|
||||
class Identifier(_Scalar):
|
||||
CLASS = 'identifier'
|
||||
|
||||
def __init__(self, value: str = None):
|
||||
if value is None:
|
||||
value = uuid.uuid4().urn
|
||||
super().__init__(value=value)
|
||||
|
||||
@property
|
||||
def element(self):
|
||||
e = super().element
|
||||
e.set('id', 'BookId')
|
||||
e.set(OPF + 'scheme', 'UUID')
|
||||
return e
|
||||
|
||||
class Language(_Scalar):
|
||||
CLASS = 'language'
|
||||
|
||||
class Publisher(_Scalar):
|
||||
CLASS = 'publisher'
|
||||
|
||||
class Source(_Scalar):
|
||||
CLASS = 'source'
|
||||
|
||||
class Subject(_Scalar):
|
||||
CLASS = 'Subject'
|
||||
|
||||
class _Calibre(_Scalar):
|
||||
|
||||
def element(self):
|
||||
return etree.Element('meta', name=f'calibre:{self.TAG}', content=self.value)
|
||||
|
||||
class Series(_Calibre):
|
||||
TAG = 'series'
|
||||
|
||||
class Index(_Scalar):
|
||||
TAG = 'series_index'
|
||||
|
||||
|
||||
|
||||
class Frontmatter:
|
||||
creators: list[Author] = None
|
||||
description: Description = None
|
||||
identifier: Identifier = None
|
||||
index: Index = None
|
||||
language: Language = None
|
||||
published: Published = None
|
||||
publisher: Publisher = None
|
||||
series: Series = None
|
||||
source: Source = None
|
||||
subjects: list[Subject] = None
|
||||
title: Title = None
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.creators = [kwargs['creator']] if 'creator' in kwargs else None
|
||||
self.subjects = [kwargs['subject']] if 'subject' in kwargs else None
|
||||
self.__dict__.update(kwargs)
|
||||
|
||||
@property
|
||||
def as_title_page(self):
|
||||
doc = get_xhtml_template()
|
||||
root = doc.getroot()
|
||||
root.append(
|
||||
E.head(
|
||||
E.title(self.title),
|
||||
E.link(href="../Styles/title-page.css", type="text/css", rel="stylesheet"),
|
||||
)
|
||||
)
|
||||
table_body = E.tbody()
|
||||
root.append(E.body(E.table(table_body), **{'class': 'title-page'}))
|
||||
|
||||
if self.creators:
|
||||
items = [str(self.creators[0])]
|
||||
for i in range(1, len(self.creators)):
|
||||
items.extend((E.br(), self.creators[i]))
|
||||
table_body.append(E.tr(E.td(*items), **{'class': 'author'}))
|
||||
|
||||
table_body.append(self.title.as_title_page)
|
||||
|
||||
if self.series:
|
||||
items = [str(self.series)]
|
||||
if self.index:
|
||||
items.extend((E.br(), str(self.index)))
|
||||
table_body.append(E.tr(E.td(*items), **{'class': 'sub-title'}))
|
||||
|
||||
if self.description:
|
||||
table_body.append(self.description.as_title_page)
|
||||
|
||||
return doc
|
||||
|
||||
49
markepub/normalize-md.py
Normal file
49
markepub/normalize-md.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import re
|
||||
|
||||
|
||||
def normalize_markdown(text: list[str]):
|
||||
|
||||
front_matter = []
|
||||
|
||||
if text[0] == "---":
|
||||
text.pop(0)
|
||||
while text:
|
||||
line = text.pop(0)
|
||||
if line == "---":
|
||||
break
|
||||
front_matter.append(line)
|
||||
|
||||
main_matter = []
|
||||
buffer = []
|
||||
|
||||
while text:
|
||||
line = text.pop(0)
|
||||
if not line:
|
||||
if len(buffer) > 0:
|
||||
main_matter.append(' '.join(buffer))
|
||||
buffer.clear()
|
||||
else:
|
||||
buffer.append(line)
|
||||
|
||||
if len(buffer) > 0:
|
||||
main_matter.append(' '.join(buffer))
|
||||
|
||||
SPACES = re.compile(r' +')
|
||||
ENDS = re.compile(r'([!?.])\s')
|
||||
ENDS2 = re.compile(r'([!?.])”\s')
|
||||
# normalize spaces...
|
||||
for i in range(len(main_matter)):
|
||||
main_matter[i] = SPACES.sub(' ', main_matter[i])
|
||||
main_matter[i] = ENDS.sub(r'\1\n', main_matter[i])
|
||||
main_matter[i] = ENDS2.sub(r'\1”\n', main_matter[i])
|
||||
|
||||
result = '---\n' + '\n'.join(front_matter) + '\n---\n\n' if front_matter else ''
|
||||
return result + '\n\n'.join(main_matter)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
with open('Karen.md', encoding='utf-8') as f:
|
||||
result = normalize_markdown([l.rstrip() for l in f.readlines()])
|
||||
|
||||
with open('Karen.md', 'w', encoding='utf-8') as f:
|
||||
f.write(result)
|
||||
165
markepub/title_page.py
Normal file
165
markepub/title_page.py
Normal file
@@ -0,0 +1,165 @@
|
||||
from lxml.builder import E
|
||||
from lxml import etree
|
||||
import pandoc
|
||||
from pandoc.types import *
|
||||
from util import get_xhtml_template
|
||||
|
||||
|
||||
def class_(*args): # class is a reserved word in Python
|
||||
return {"class": ' '.join(args)}
|
||||
|
||||
|
||||
def make_title_page(pandoc_meta: Meta) -> etree.ElementTree:
|
||||
meta_dict = pandoc_meta[0]
|
||||
items = []
|
||||
if 'creator' in meta_dict:
|
||||
# either scarlar (used as value)
|
||||
# or a List
|
||||
author = None
|
||||
if isinstance(meta_dict['creator'], MetaInlines):
|
||||
author = pandoc.write(meta_dict['creator'])
|
||||
elif isinstance(meta_dict['creator'], MetaList):
|
||||
for i in meta_dict['creator'][0]:
|
||||
if 'role' in i[0] and i[0]['role'] != 'aut':
|
||||
continue
|
||||
if 'text' in i[0]:
|
||||
author = pandoc.write(i[0]['text'])
|
||||
if author:
|
||||
items.append(E.tr(E.td(author.strip(), class_('author'))))
|
||||
|
||||
title = ''
|
||||
if 'title' in meta_dict:
|
||||
if isinstance(meta_dict['title'], MetaInlines):
|
||||
title = pandoc.write(meta_dict['title'])
|
||||
elif isinstance(meta_dict['title'], MetaMap):
|
||||
if 'text' in meta_dict['title'][0]:
|
||||
title = pandoc.write(meta_dict['title'][0]['text'])
|
||||
if title:
|
||||
items.append(E.tr(E.td(title.strip(), class_('title'))))
|
||||
|
||||
|
||||
if 'sub-title' in meta_dict:
|
||||
sub_title = None
|
||||
if isinstance(meta_dict['sub-title'], MetaInlines):
|
||||
sub_title = pandoc.write(meta_dict['sub-title'])
|
||||
elif isinstance(meta_dict['sub-title'], MetaMap):
|
||||
if 'text' in meta_dict['sub-title'][0]:
|
||||
sub_title = pandoc.write(meta_dict['sub-title'][0]['text'])
|
||||
if sub_title:
|
||||
items.append(E.tr(E.td(sub_title.strip(), class_('sub-title'))))
|
||||
|
||||
|
||||
if 'description' in meta_dict:
|
||||
description = None
|
||||
if isinstance(meta_dict['description'], MetaInlines):
|
||||
description = pandoc.write(meta_dict['description'],
|
||||
format='html5',
|
||||
options=["--wrap=none"]
|
||||
)
|
||||
if description:
|
||||
items.append(E.tr(E.td(description.strip(), class_('description'))))
|
||||
|
||||
doc = get_xhtml_template()
|
||||
root = doc.getroot()
|
||||
root.append(
|
||||
E.head(
|
||||
E.title(title.strip()),
|
||||
E.link(href="../Styles/title-page.css", type = "text/css", rel = "stylesheet"),
|
||||
)
|
||||
)
|
||||
if items:
|
||||
root.append(
|
||||
E.body(
|
||||
E.table(
|
||||
E.tbody(*items),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
return doc
|
||||
|
||||
|
||||
def prettyprint(element, **kwargs):
|
||||
xml = etree.tostring(element, pretty_print=True, **kwargs)
|
||||
print(xml.decode(), end='')
|
||||
|
||||
if __name__ == '__main__':
|
||||
data = Meta({
|
||||
'creator': MetaList([
|
||||
MetaMap({
|
||||
'file-as': MetaInlines([
|
||||
Str('Riter,'),
|
||||
Space(),
|
||||
Str('E.Z.')]),
|
||||
'role': MetaInlines([
|
||||
Str('aut')]),
|
||||
'text': MetaInlines([
|
||||
Str('E.Z.'),
|
||||
Space(),
|
||||
Str('Riter')])})]),
|
||||
'description': MetaInlines([
|
||||
Str('Karen'),
|
||||
Space(),
|
||||
Str('meets'),
|
||||
Space(),
|
||||
Str('the'),
|
||||
Space(),
|
||||
Str('man'),
|
||||
Space(),
|
||||
Str('she'),
|
||||
Space(),
|
||||
Str('can’t'),
|
||||
Space(),
|
||||
Str('resist'),
|
||||
Space(),
|
||||
Str('—'),
|
||||
Space(),
|
||||
Str('the'),
|
||||
Space(),
|
||||
Str('man'),
|
||||
Space(),
|
||||
Str('who'),
|
||||
Space(),
|
||||
Str('can'),
|
||||
Space(),
|
||||
Str('do'),
|
||||
Space(),
|
||||
Str('anything'),
|
||||
Space(),
|
||||
Str('to'),
|
||||
Space(),
|
||||
Str('her,'),
|
||||
Space(),
|
||||
Str('and'),
|
||||
Space(),
|
||||
Str('she'),
|
||||
Space(),
|
||||
Str('will'),
|
||||
Space(),
|
||||
Str('love'),
|
||||
Space(),
|
||||
Str('it'),
|
||||
Space(),
|
||||
Str('—'),
|
||||
Space(),
|
||||
Str('and'),
|
||||
Space(),
|
||||
Str('him.')]),
|
||||
'language': MetaInlines([Str('en')]),
|
||||
'published': MetaInlines([Str('2002-07-24')]),
|
||||
'source': MetaInlines([Str('https://www.bdsmlibrary.info/stories/story.php?storyid=1101')]),
|
||||
'subject': MetaList([
|
||||
MetaInlines([Str('M/f')]),
|
||||
MetaInlines([Str('pregnant')]),
|
||||
MetaInlines([Str('spanking')]),
|
||||
MetaInlines([Str('D/s')]),
|
||||
MetaInlines([Str('real')]),
|
||||
MetaInlines([Str('reluctant')]),
|
||||
MetaInlines([Str('Serious')])]),
|
||||
'title': MetaMap({
|
||||
'file-as': MetaInlines([Str('Karen')]),
|
||||
'text': MetaInlines([Str('Karen')])})})
|
||||
|
||||
doc = make_title_page(data)
|
||||
|
||||
print(etree.tostring(doc, pretty_print=True, xml_declaration=True, encoding='utf-8').decode(), end='')
|
||||
9
markepub/util.py
Normal file
9
markepub/util.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from lxml import etree
|
||||
|
||||
def get_xhtml_template():
|
||||
html = etree.XML('<?xml version="1.0"?>'
|
||||
'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"'
|
||||
' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
|
||||
'<html xmlns="http://www.w3.org/1999/xhtml"></html>')
|
||||
tree = etree.ElementTree(html)
|
||||
return tree
|
||||
10
pyproject.toml
Normal file
10
pyproject.toml
Normal file
@@ -0,0 +1,10 @@
|
||||
[project]
|
||||
name = "markepub"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"lxml>=6.0.1",
|
||||
"pandoc>=2.4",
|
||||
"pyyaml>=6.0.2",
|
||||
]
|
||||
Reference in New Issue
Block a user