diff --git a/escape_hatch.py b/escape_hatch.py new file mode 100644 index 0000000..675a07a --- /dev/null +++ b/escape_hatch.py @@ -0,0 +1,71 @@ +import os +import json +import markdownify +from datetime import datetime +from collections import defaultdict + +def export_posts_to_markdown(base_folder): + posts = [] + + for root, dirs, files in os.walk(base_folder): + for file_name in files: + if file_name.endswith('.likes.json'): + continue # Skip files ending with .likes.json + + file_path = os.path.join(root, file_name) + with open(file_path, 'r') as file: + post = json.load(file) + posts.append(post) + + # Sort posts by the 'published' date + posts.sort(key=lambda x: x.get('published', '')) + + # Group posts by month and year + grouped_posts = defaultdict(list) + for post in posts: + published = post.get('published', '') + if published: + month_year = datetime.fromisoformat(published.rstrip("Z")).strftime("%B %Y") + grouped_posts[month_year].append(post) + + markdown_output = "" + # Check if there are any posts to determine the author + if posts: + # Extract and format the 'attributedTo' field as 'username@domain' + first_post = posts[0] + attributed_to_url = first_post.get('attributedTo', '') + if attributed_to_url: + parts = attributed_to_url.split('/') + username = parts[-1] if len(parts) > 1 else "" + domain = parts[2] if len(parts) > 2 else "" + attributed_to = f"{username}@{domain}" + else: + attributed_to = "Unknown User" + markdown_output += f"# Posts from {attributed_to}\n\n\n" + + for month_year, posts in grouped_posts.items(): + markdown_output += f"## {month_year}\n\n" # Month-Year header + + for post in posts: + # Convert HTML content to Markdown and strip trailing newlines + content = markdownify.markdownify(post.get('content', '')).rstrip('\n') + published = post.get('published', '') + + # Convert and format the 'published' date + if published: + published_datetime = datetime.fromisoformat(published.rstrip("Z")) + formatted_published = published_datetime.strftime("%m-%d-%y %H:%M:%S") + else: + formatted_published = "Unknown Date" + + # Format and add the post to the markdown output with proper newlines + markdown_output += f"\n{content}\n\n" # Ensure two newlines after the content + markdown_output += f"**Published:** {formatted_published}\n\n" + + return markdown_output + +# Example usage of the function +base_folder = 'app/.data/posts/' + +markdown_content = export_posts_to_markdown(base_folder) +print(markdown_content) # This should print the markdown content