VMProtect/help/sync.py

317 lines
7.7 KiB
Python

#!/usr/bin/python
import pyblog, codecs, json, os, re, sys, time
SRC_PATH = "."
STRUCT_FILE = sys.argv[1]
##########################################################################################
# delete old pages
def find_user_manual_id(root_title, all_pages):
for page in all_pages:
if page['page_title'] == root_title:
return int(page['page_id'])
raise Exception('Can\'t find user manual page')
def find_children(all, root_id):
children = []
prev = [root_id]
current = []
while len(prev) > 0:
for p in all:
this_id = int(p['page_id'])
parent_id = int(p['page_parent_id'])
if prev.count(parent_id) > 0:
current.append(this_id)
children.extend(prev)
prev = current
current = []
children.extend(prev)
children.remove(root_id)
return children
def delete_pages(blog, pages):
for p in pages:
try:
blog.delete_page(p)
#time.sleep(8)
except pyblog.BlogError as text:
print ("warning while attempting to delete the page %d: %s" % (p, text))
except xml.parsers.expat.ExpatError as text:
print ("xerror: %s" % (text))
##########################################################################################
# create new pages
def load_structure():
f = codecs.open(STRUCT_FILE, 'r', 'utf-8')
return json.load(f)
def get_good_image_link(link):
parts = link.split('/')
return "/usermanual/" + parts[len(parts) - 1]
def remove_newlines(text):
lines = text.splitlines()
nlines = []
line = ""
in_pre = False
for l in lines:
if in_pre:
nlines.append(l)
if re.search("</pre", l):
in_pre = False
nlines.append("")
continue
l = l.strip()
if re.search("<pre", l):
if line != "":
nlines.append(line)
line = ""
nlines.append("")
nlines.append(l)
in_pre = True
continue
if l == "":
if line != "":
nlines.append(line)
line = ""
continue
if line != "":
line = line + " "
line = line + l
if line != "":
nlines.append(line)
return "\n".join(nlines)
def filter_content(text):
# remove newlines
text = remove_newlines(text)
# adjust images
images = set(re.findall('<img.*?src\s*=\s*"([^"]*)"', text, re.M + re.DOTALL))
for l in images:
text = text.replace(l, get_good_image_link(l))
# remove footer
text = re.sub('([\r\n\s]*<br[^>]*>[\r\n\s]*)+<hr[^>]*>[\s\n\r]*<div.*Copyright.+$', '', text)
# that's all
return text
def load_single_page(p):
print ("\nprocessing page %s" % (p['file']))
content = codecs.open(os.path.join(SRC_PATH, p['file']), 'r', 'utf-8').read()
title = p['title']
m_title = re.search('<title>(.*)</title>', content, re.M + re.DOTALL)
if m_title:
title = m_title.group(1)
title = title.replace('&quot;', '"')
p['title'] = title
m_content = re.search('<body>(.*)</body>', content, re.M + re.DOTALL)
if m_content:
content = m_content.group(1)
else:
content = "not found"
content = filter_content(content)
p['content'] = content
def load_pages_content(pages):
for p in pages:
load_single_page(p)
if 'children' in p:
load_pages_content(p['children'])
def print_pages_content(pages):
for p in pages:
print ("file %s" % (p['file']))
print ("page %s, title = %s, content:\n%s\n\n" % (p['file'], p['title'], repr(p['content'])))
if 'children' in p:
print_pages_content(p['children'])
def create_single_page(blog, title, content, parent, order):
print ("creating page: %s" % (title))
try:
query = {'wp_page_parent_id': parent, 'title': title, 'description': content, 'mt_allow_comments': 0, 'mt_allow_pings': 0, 'publish': 1, 'wp_page_order': order}
return blog.new_page(query)
except pyblog.BlogError as text:
print ("error: %s" % (text))
except xml.parsers.expat.ExpatError as text:
print ("xerror: %s" % (text))
def create_new_pages(blog, root, pages):
cnt = 0
for p in pages:
title = p['title']
file = p['file']
content = p['content']
id = create_single_page(blog, title, content, root, cnt)
#time.sleep(8)
wp_page = blog.get_page(id)
p['id'] = id
p['link'] = wp_page['link']
if 'children' in p:
create_new_pages(blog, id, p['children'])
cnt = cnt + 1
def update_single_page(blog, page):
print ("updating page %s" % (page['title']))
try:
query = {'title': page['title'], 'description': page['content']}
blog.edit_page(page['id'], query)
except pyblog.BlogError as text:
print ("error: %s" % (text))
except xml.parsers.expat.ExpatError as text:
print ("xerror: %s" % (text))
def update_pages(blog, pages):
for p in pages:
update_single_page(blog, p)
time.sleep(10)
if 'children' in p:
update_pages(blog, p['children'])
def find_page_in_all_exact(fname, all_pages):
for p in all_pages:
if fname == p['file']: return p['link']
if 'children' in p:
res = find_page_in_all_exact(fname, p['children'])
if res: return res
return None
def find_page_in_all_by_name(fname, all_pages):
for p in all_pages:
if os.path.basename(fname) == os.path.basename(p['file']):
return p['link']
if 'children' in p:
res = find_page_in_all_by_name(fname, p['children'])
if res: return res
return None
def find_target(fname, link, all_new_pages):
dir = os.path.dirname(fname)
tgt = os.path.join(dir, link)
tgt = os.path.normpath(tgt)
tgt = tgt.replace('\\', '/')
good_tgt = find_page_in_all_exact(tgt, all_new_pages)
if good_tgt:
return good_tgt
good_tgt = find_page_in_all_by_name(tgt, all_new_pages)
if good_tgt:
return good_tgt
else:
return "--- unknown link ---"
def process_links_on_page(page, all_new_pages):
text = page['content']
links = re.findall('<a[^>]*href\s*=\s*"([^"#]*)', text, re.DOTALL)
for l in links:
if l == "": continue
nl = l.replace('\\', '/')
if re.match("(http|mailto):", nl): continue
nl = find_target(page['file'], nl, all_new_pages)
text = text.replace(l, nl)
page['content'] = text
def process_links(pages, all_new_pages):
for p in pages:
process_links_on_page(p, all_new_pages)
if 'children' in p:
process_links(p['children'], all_new_pages)
def get_list_of_pages(pages):
list = "\n<ul class=toc>\n"
for p in pages:
list = list + '<li><a href="' + p['link'] + '">' + p['title'] + '</a></li>\n'
if 'children' in p:
list = list + get_list_of_pages(p['children'])
return list + "</ul>\n"
def update_main_page(blog, id, pages):
print ('updating the main user manual page')
list = '''
<h1>User Manual</h1>
The table of contents:
<style> ul.toc li { margin-left: 2em; } #content>ul { padding-left: 0; } </style>
'''
list = list + get_list_of_pages(pages)
try:
query = {'description': list}
blog.edit_page(id, query)
except pyblog.BlogError as text:
print ("error: %s" % (text))
##########################################################################################
# main code
print ("------------------")
test = '''
one
<pre>
two
three
four</pre>
more
some text
<pre>
some text
one more
</pre>
a
b
c
'''
#print test
#test = remove_newlines(test)
#print "------------------"
#print test
#quit()
struct = load_structure()
new_pages = struct['children']
load_pages_content(new_pages)
#print_pages_content(new_pages)
#quit()
# blog = pyblog.WordPress('http://test.vmpsoft.com/xmlrpc.php', 'admin', '12345')
blog = pyblog.WordPress('http://vmpsoft.com/xmlrpc.php', 'uploader', 'lcRn4F29Rr4S')
all_pages = blog.get_page_list()
user_manual_id = find_user_manual_id(struct['root_title'], all_pages)
print ("user manual page has id %d" % (user_manual_id))
user_manual_children = find_children(all_pages, user_manual_id)
print ("user manual children pages are %s\ndeleting..." % (user_manual_children))
delete_pages(blog, user_manual_children) # to trash
delete_pages(blog, user_manual_children) # from trash
print ("done, ready to create the new structure")
create_new_pages(blog, user_manual_id, new_pages)
process_links(new_pages, new_pages)
update_pages(blog, new_pages)
update_main_page(blog, user_manual_id, new_pages)