72 lines
2.7 KiB
Python
72 lines
2.7 KiB
Python
|
import os
|
||
|
import json
|
||
|
import markdownify
|
||
|
from datetime import datetime
|
||
|
from collections import defaultdict
|
||
|
|
||
|
def export_posts_to_markdown(base_folder):
|
||
|
posts = []
|
||
|
|
||
|
for root, dirs, files in os.walk(base_folder):
|
||
|
for file_name in files:
|
||
|
if file_name.endswith('.likes.json'):
|
||
|
continue # Skip files ending with .likes.json
|
||
|
|
||
|
file_path = os.path.join(root, file_name)
|
||
|
with open(file_path, 'r') as file:
|
||
|
post = json.load(file)
|
||
|
posts.append(post)
|
||
|
|
||
|
# Sort posts by the 'published' date
|
||
|
posts.sort(key=lambda x: x.get('published', ''))
|
||
|
|
||
|
# Group posts by month and year
|
||
|
grouped_posts = defaultdict(list)
|
||
|
for post in posts:
|
||
|
published = post.get('published', '')
|
||
|
if published:
|
||
|
month_year = datetime.fromisoformat(published.rstrip("Z")).strftime("%B %Y")
|
||
|
grouped_posts[month_year].append(post)
|
||
|
|
||
|
markdown_output = ""
|
||
|
# Check if there are any posts to determine the author
|
||
|
if posts:
|
||
|
# Extract and format the 'attributedTo' field as 'username@domain'
|
||
|
first_post = posts[0]
|
||
|
attributed_to_url = first_post.get('attributedTo', '')
|
||
|
if attributed_to_url:
|
||
|
parts = attributed_to_url.split('/')
|
||
|
username = parts[-1] if len(parts) > 1 else ""
|
||
|
domain = parts[2] if len(parts) > 2 else ""
|
||
|
attributed_to = f"{username}@{domain}"
|
||
|
else:
|
||
|
attributed_to = "Unknown User"
|
||
|
markdown_output += f"# Posts from {attributed_to}\n\n\n"
|
||
|
|
||
|
for month_year, posts in grouped_posts.items():
|
||
|
markdown_output += f"## {month_year}\n\n" # Month-Year header
|
||
|
|
||
|
for post in posts:
|
||
|
# Convert HTML content to Markdown and strip trailing newlines
|
||
|
content = markdownify.markdownify(post.get('content', '')).rstrip('\n')
|
||
|
published = post.get('published', '')
|
||
|
|
||
|
# Convert and format the 'published' date
|
||
|
if published:
|
||
|
published_datetime = datetime.fromisoformat(published.rstrip("Z"))
|
||
|
formatted_published = published_datetime.strftime("%m-%d-%y %H:%M:%S")
|
||
|
else:
|
||
|
formatted_published = "Unknown Date"
|
||
|
|
||
|
# Format and add the post to the markdown output with proper newlines
|
||
|
markdown_output += f"\n{content}\n\n" # Ensure two newlines after the content
|
||
|
markdown_output += f"**Published:** {formatted_published}\n\n"
|
||
|
|
||
|
return markdown_output
|
||
|
|
||
|
# Example usage of the function
|
||
|
base_folder = 'app/.data/posts/'
|
||
|
|
||
|
markdown_content = export_posts_to_markdown(base_folder)
|
||
|
print(markdown_content) # This should print the markdown content
|