summaryrefslogtreecommitdiff
path: root/gmi2rss
blob: c0ecbf5def2f3e9e9a802cdeffc0da509b7bb933 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/bin/env python3
"""
Copyright (c) 2021 Josias
You may use this software under the terms of the MIT license (see LICENSE for more information).
"""
from datetime import datetime
import dateutil
import os
import re
import string
import sys

from feedgen.feed import FeedGenerator

def find_links(document):
    """Returns an array of all link lines in a gemtext document"""
    return [line for line in document.split('\n') if line.startswith('=> ')]

def parse_feed_link(line):
    """Parses a feed link line into its link, date, and title"""
    if not is_feed_link(line):
        raise ValueError("Line not a valid feed link")

    parts = re.split('\s+', line)
    return {
            'link': parts[1],
            'date': parts[2],
            'title': ' '.join(parts[4:]),
            }

def is_feed_link(line):
    """Determines whether or not the link line is a valid gemini feed link"""
    if not line.startswith("=>"):
        return False

    parts = re.split('\s+', line)

    # ['=>', 'gemini://example.com', '1970-01-01', '-', 'Epoch']
    if len(parts) < 5:
        return False
    
    date = parts[2]

    if len(date) != 10:
        return False

    for ch in date:
        if not ch.isdigit() and not ch == '-':
            return False

    return True

def main():
    if len(sys.argv) < 2:
        print("Usage: gmi2rss FILE URL")
        return

    with open(sys.argv[1]) as file:
        doc = file.read()

    root = sys.argv[2]

    title = ""
    subtitle = ""
    # Currenly assumes whitespace after # or ##
    for line in doc.split('\n'):
        if line.startswith('# '):
            title = ' '.join(re.split('\s+', line)[1:])
        elif title != "" and line.startswith("## "):
            subtitle = ' '.join(re.split('\s+', line)[1:])
            break
        elif title != "" and line != "":
            break

    feed_links = [parse_feed_link(link) for link in find_links(doc) if is_feed_link(link)]

    fg = FeedGenerator()
    fg.title(title)
    fg.subtitle(subtitle)
    fg.id(root)
    fg.link(href=root, rel="self")
    
    latest_date = "1970-01-01T00:00:00Z" # the beginning of the universe, as far as we're concerned
    for link in feed_links:
        fe = fg.add_entry()
        fe.title(link['title'])
        url = os.path.join(root, link['link'])
        fe.id(url)
        fe.link(href=url, rel="alternate")

        date = link['date'] + 'T12:00:00Z'
        if date > latest_date:
            latest_date = date
        fe.updated(date)

    if latest_date != "1970-01-01T00:00:00Z":
        fg.updated(latest_date)

    print(fg.atom_str(pretty=True).decode('UTF-8'), end='')
    
if __name__ == "__main__":
    main()