-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_rss.py
59 lines (51 loc) · 1.7 KB
/
parse_rss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from lxml import etree
import sqlite3
from datetime import datetime
def parse_rss2(file_desc):
tree = etree.parse(file_desc)
root = tree.getroot()
items = root.find("channel").findall("item")
entries = []
for i in items:
url = i.find("link").text
title = i.find("title").text
guid = i.find("guid").text
pub_date = i.find("pubDate").text
try:
dt = datetime.strptime(pub_date, "%a, %d %b %Y %H:%M:%S %Z")
except ValueError:
dt = None
entries.append({"title": title, "url": url, "guid": guid, "pub_date": dt})
return entries
def create_db(db):
conn = sqlite3.connect(db)
c = conn.cursor()
schema = """
CREATE TABLE entries (
id integer primary key autoincrement not null,
title text,
url text,
guid text not null,
tweeted boolean,
pub_date timestamp
);
"""
c.execute(schema)
def push_db(entries, db):
conn = sqlite3.connect(db)
for e in entries:
c = conn.cursor()
c.execute("SELECT COUNT(guid) FROM entries WHERE guid = ?", (e["guid"],))
if c.fetchone() == (0,):
values = (e["title"], e["url"], e["guid"], False, e["pub_date"])
c.execute("INSERT INTO entries (title,url,guid,tweeted,pub_date) values (?,?,?,?,?)", values)
conn.commit()
def choose_tweet(db, mark=False):
conn = sqlite3.connect(db)
c = conn.cursor()
c.execute("SELECT title, url, id FROM entries WHERE tweeted = 0 ORDER BY id DESC")
tweet = c.fetchone()
if mark and tweet is not None:
c.execute("UPDATE entries SET tweeted = 1 WHERE id = ?", (tweet[2],))
conn.commit()
return tweet