diff options
author | panda-roux <contact@panda-roux.pub> | 2021-08-22 11:02:16 -0700 |
---|---|---|
committer | panda-roux <contact@panda-roux.pub> | 2021-08-22 11:02:16 -0700 |
commit | ab5982443efd3969ae9b240be7f52e29fc8409bd (patch) | |
tree | d9ad4c01ae511341203fe884306b8479f27d6a99 |
adding files
-rw-r--r-- | .gitignore | 4 | ||||
-rw-r--r-- | CMakeLists.txt | 35 | ||||
-rw-r--r-- | README.md | 70 | ||||
-rw-r--r-- | example.conf | 73 | ||||
-rw-r--r-- | include/config.h | 39 | ||||
-rw-r--r-- | include/fetch.h | 22 | ||||
-rw-r--r-- | include/log.h | 25 | ||||
-rw-r--r-- | include/mail.h | 12 | ||||
-rw-r--r-- | include/parse.h | 26 | ||||
-rw-r--r-- | include/post.h | 25 | ||||
-rw-r--r-- | src/config.c | 179 | ||||
-rw-r--r-- | src/fetch.c | 219 | ||||
-rw-r--r-- | src/mail.c | 221 | ||||
-rw-r--r-- | src/main.c | 63 | ||||
-rw-r--r-- | src/parse.c | 231 | ||||
-rw-r--r-- | src/post.c | 19 |
16 files changed, 1263 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4543fbf --- /dev/null +++ b/.gitignore | |||
@@ -0,0 +1,4 @@ | |||
1 | build/ | ||
2 | compile_commands.json | ||
3 | .cache/ | ||
4 | .clang-tidy | ||
diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..b35984b --- /dev/null +++ b/CMakeLists.txt | |||
@@ -0,0 +1,35 @@ | |||
1 | cmake_minimum_required(VERSION 3.17.0) | ||
2 | |||
3 | project(rssmail VERSION 1.0.0 LANGUAGES C) | ||
4 | |||
5 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) | ||
6 | |||
7 | find_package(LibXml2 REQUIRED) | ||
8 | find_package(CURL REQUIRED) | ||
9 | |||
10 | add_executable(rssmail | ||
11 | src/main.c | ||
12 | src/fetch.c | ||
13 | src/parse.c | ||
14 | src/mail.c | ||
15 | src/config.c | ||
16 | src/post.c) | ||
17 | |||
18 | if(CMAKE_BUILD_TYPE STREQUAL "Debug") | ||
19 | target_compile_options(rssmail PRIVATE -ggdb3 -fsanitize=address -fsanitize=undefined) | ||
20 | target_link_options(rssmail PRIVATE -no-pie -fsanitize=address -fsanitize=undefined) | ||
21 | target_compile_definitions(rssmail PRIVATE DEBUG RSSMAIL_SEND_TO_FILE) | ||
22 | endif() | ||
23 | |||
24 | # disabling logging may reduce binary size | ||
25 | # target_compile_definitions(rssmail PRIVATE RSSMAIL_DISABLE_LOGGING) | ||
26 | |||
27 | target_link_libraries(rssmail | ||
28 | PRIVATE | ||
29 | ${LIBXML2_LIBRARIES} | ||
30 | ${CURL_LIBRARIES}) | ||
31 | target_include_directories(rssmail | ||
32 | PRIVATE | ||
33 | "${CMAKE_CURRENT_SOURCE_DIR}/include" | ||
34 | ${LIBXML2_INCLUDE_DIRS} | ||
35 | ${CURL_INCLUDE_DIRS}) | ||
diff --git a/README.md b/README.md new file mode 100644 index 0000000..453eeed --- /dev/null +++ b/README.md | |||
@@ -0,0 +1,70 @@ | |||
1 | # rssmail | ||
2 | |||
3 | An RSS-to-email aggregator tool. | ||
4 | |||
5 | The aim of this tool is to be simple to configure and run. | ||
6 | |||
7 | ## Usage | ||
8 | ``` | ||
9 | ./rssmail <first.conf> [second.conf ...] | ||
10 | ``` | ||
11 | |||
12 | See example.conf for available configuration options and aliases. | ||
13 | |||
14 | Configuration can be split across multiple files. A use-case for this might be one in which you'd like to send different digests to different sets of recipients using a common set SMTP parameters: | ||
15 | |||
16 | ``` | ||
17 | ./rssmail server.conf cat-feeds.conf cat-lovers.conf | ||
18 | ./rssmail server.conf mushroom-feeds.conf shroom-lovers.conf | ||
19 | |||
20 | |||
21 | server.conf: | ||
22 | |||
23 | username: admin@example.com | ||
24 | password: hatemyjob | ||
25 | host: mail.example.com | ||
26 | port: 465 | ||
27 | from: mailer@example.com | ||
28 | |||
29 | |||
30 | cat-feeds.conf: | ||
31 | |||
32 | max_age: 1440 | ||
33 | uri: https://cats.example.com/rss.xml | ||
34 | uri: https://kittens.example.com/rss | ||
35 | uri: https://tabbies.example.com/feed | ||
36 | ... | ||
37 | |||
38 | |||
39 | mushroom-feeds.conf: | ||
40 | |||
41 | max_age: 1440 | ||
42 | uri: https://mush.example.com/rss | ||
43 | uri: https://mycelium.example.com/feed&type=rss | ||
44 | ... | ||
45 | |||
46 | |||
47 | cat-lovers.conf: | ||
48 | |||
49 | to: cats-list@example.com | ||
50 | recipient: alice@example.com | ||
51 | recipient: bob@example.com | ||
52 | ... | ||
53 | |||
54 | |||
55 | shroom-lovers.conf: | ||
56 | |||
57 | to: myco-list@example.com | ||
58 | recipient: jake@example.com | ||
59 | recipient: balthazar@example.com | ||
60 | recipient: alice@example.com | ||
61 | ... | ||
62 | ``` | ||
63 | |||
64 | ## Runtime Dependencies | ||
65 | - LibXML2 | ||
66 | - LibCURL | ||
67 | |||
68 | ## Build Dependencies | ||
69 | - GCC or Clang | ||
70 | - CMake | ||
diff --git a/example.conf b/example.conf new file mode 100644 index 0000000..1df4d83 --- /dev/null +++ b/example.conf | |||
@@ -0,0 +1,73 @@ | |||
1 | # -------------------------------------------------- | ||
2 | # Notice | ||
3 | # -------------------------------------------------- | ||
4 | # | ||
5 | # Configuration settings may be spread across multiple files. | ||
6 | # | ||
7 | # This allows you to, for example, have files with different lists of | ||
8 | # recipients, or one list of recipients that receives digests from | ||
9 | # varied sources. | ||
10 | # | ||
11 | # The order of the file paths passed as arguments doesn't matter; | ||
12 | # the application treats them as though they were concatenated | ||
13 | # files. | ||
14 | # | ||
15 | # Example: | ||
16 | # | ||
17 | # ./rssmail recipients.conf primary.conf | ||
18 | # ./rssmail secondary.conf recipients.conf extra_recipients.conf | ||
19 | |||
20 | |||
21 | # -------------------------------------------------- | ||
22 | # SMTP configuration | ||
23 | # -------------------------------------------------- | ||
24 | |||
25 | # server information | ||
26 | username sender@example.com | ||
27 | password hunter2 | ||
28 | host mail.example.com | ||
29 | port 465 | ||
30 | |||
31 | # outgoing mail headers | ||
32 | from sender@example.com | ||
33 | to list@example.com | ||
34 | subject RSS Mailing List | ||
35 | |||
36 | |||
37 | # -------------------------------------------------- | ||
38 | # RSS feed configuration | ||
39 | # -------------------------------------------------- | ||
40 | |||
41 | useragent rssmail by panda-roux (https://sr.ht/~panda-roux/rssmail/) | ||
42 | |||
43 | # feeds | ||
44 | feed_uri https://itsgoingdown.org/feed | ||
45 | feed_uri https://crimethinc.com/rss | ||
46 | feed_uri https://www.revolutionbythebook.akpress.org/feed/ | ||
47 | feed_uri https://channelzeronetwork.com/rss | ||
48 | |||
49 | # maximum age of posts to include in the digest (minutes) | ||
50 | max_age 1440 | ||
51 | |||
52 | # recipients are listed on separate lines beginning with the "recipient" token | ||
53 | recipient contact@panda-roux.dev | ||
54 | recipient someone@example.com | ||
55 | recipient someone-else@example.com | ||
56 | |||
57 | |||
58 | # -------------------------------------------------- | ||
59 | # Miscellaneous | ||
60 | # -------------------------------------------------- | ||
61 | |||
62 | # aliases: | ||
63 | # - from, sender, from_address | ||
64 | # - to, to_address | ||
65 | # - subject, subj, sub | ||
66 | # - username, user | ||
67 | # - password, pass, passwd | ||
68 | # - host, server_host | ||
69 | # - port, server_port | ||
70 | # - recipient, rcpt | ||
71 | # - feed_uri, feed_url, uri, url, feed | ||
72 | # - useragent, agent | ||
73 | # - max_age, age | ||
diff --git a/include/config.h b/include/config.h new file mode 100644 index 0000000..3045878 --- /dev/null +++ b/include/config.h | |||
@@ -0,0 +1,39 @@ | |||
1 | #ifndef CONFIG_H | ||
2 | #define CONFIG_H | ||
3 | |||
4 | #include <stdbool.h> | ||
5 | |||
6 | #define RSSMAIL_CONFIGURE_SUCCESS 0 | ||
7 | #define RSSMAIL_CONFIGURE_FAILURE -1 | ||
8 | |||
9 | typedef struct smtp_config_t { | ||
10 | int recipient_count; | ||
11 | int port; | ||
12 | char* from; | ||
13 | char* to; | ||
14 | char* subject; | ||
15 | char* username; | ||
16 | char* password; | ||
17 | char* host; | ||
18 | char* cert_path; | ||
19 | char** recipients; | ||
20 | } smtp_config_t; | ||
21 | |||
22 | typedef struct rss_config_t { | ||
23 | int max_age; | ||
24 | int uri_count; | ||
25 | char** uris; | ||
26 | char* useragent; | ||
27 | } rss_config_t; | ||
28 | |||
29 | int read_config_file(const char* path, smtp_config_t* s, rss_config_t* r); | ||
30 | |||
31 | void init_config(smtp_config_t* s, rss_config_t* r); | ||
32 | |||
33 | void generate_config_file(void); | ||
34 | |||
35 | void free_smtp_config(smtp_config_t* smtp); | ||
36 | |||
37 | void free_rss_config(rss_config_t* rss); | ||
38 | |||
39 | #endif | ||
diff --git a/include/fetch.h b/include/fetch.h new file mode 100644 index 0000000..fc2fa43 --- /dev/null +++ b/include/fetch.h | |||
@@ -0,0 +1,22 @@ | |||
1 | #ifndef FETCH_H | ||
2 | #define FETCH_H | ||
3 | |||
4 | #include "config.h" | ||
5 | #include "post.h" | ||
6 | |||
7 | /* | ||
8 | * Fetches an RSS document and converts into a list of post_item_t structures | ||
9 | */ | ||
10 | post_item_t* fetch_posts(rss_config_t* config, int* count); | ||
11 | |||
12 | /* | ||
13 | * Initializes global resources required for fetching data from URLs | ||
14 | */ | ||
15 | void init_fetch(void); | ||
16 | |||
17 | /* | ||
18 | * Cleans up global resources used for fetching data from URLs | ||
19 | */ | ||
20 | void cleanup_fetch(void); | ||
21 | |||
22 | #endif | ||
diff --git a/include/log.h b/include/log.h new file mode 100644 index 0000000..04ceca4 --- /dev/null +++ b/include/log.h | |||
@@ -0,0 +1,25 @@ | |||
1 | #ifndef LOG_H | ||
2 | #define LOG_H | ||
3 | |||
4 | #ifndef RSSMAIL_DISABLE_LOGGING | ||
5 | |||
6 | #include <stdlib.h> | ||
7 | |||
8 | #define LOG(fmt, ...) printf(fmt "\n", ##__VA_ARGS__) | ||
9 | #define LOG_ERROR(fmt, ...) fprintf(stderr, fmt "\n", ##__VA_ARGS__) | ||
10 | |||
11 | #ifdef DEBUG | ||
12 | #define LOG_DEBUG(fmt, ...) printf("DEBUG: " fmt "\n", ##__VA_ARGS__) | ||
13 | #else | ||
14 | #define LOG_DEBUG(fmt, ...) (void)(fmt) | ||
15 | #endif | ||
16 | |||
17 | #else | ||
18 | |||
19 | #define LOG(fmt, ...) (void)(fmt) | ||
20 | #define LOG_ERROR(fmt, ...) (void)(fmt) | ||
21 | #define LOG_DEBUG(fmt, ...) (void)(fmt) | ||
22 | |||
23 | #endif | ||
24 | |||
25 | #endif | ||
diff --git a/include/mail.h b/include/mail.h new file mode 100644 index 0000000..c54a153 --- /dev/null +++ b/include/mail.h | |||
@@ -0,0 +1,12 @@ | |||
1 | #ifndef MAIL_H | ||
2 | #define MAIL_H | ||
3 | |||
4 | #include "config.h" | ||
5 | #include "post.h" | ||
6 | |||
7 | #define RSSMAIL_SEND_SUCCESS 0 | ||
8 | #define RSSMAIL_SEND_FAILURE -1 | ||
9 | |||
10 | int send_posts_as_digest(smtp_config_t* config, post_item_t* posts, int count); | ||
11 | |||
12 | #endif | ||
diff --git a/include/parse.h b/include/parse.h new file mode 100644 index 0000000..5bbad27 --- /dev/null +++ b/include/parse.h | |||
@@ -0,0 +1,26 @@ | |||
1 | #ifndef PARSE_H | ||
2 | #define PARSE_H | ||
3 | |||
4 | #include <libxml/tree.h> | ||
5 | |||
6 | #include "config.h" | ||
7 | #include "post.h" | ||
8 | |||
9 | /* | ||
10 | * Reads the contents of the provided XML document and uses the information it | ||
11 | * contains in order to construct zero or more syndicated post objects | ||
12 | */ | ||
13 | post_item_t* parse_posts_from_doc(xmlDocPtr doc, int* count, | ||
14 | rss_config_t* config); | ||
15 | |||
16 | /* | ||
17 | * Initializes the global resources used by LibXML | ||
18 | */ | ||
19 | void init_parse(void); | ||
20 | |||
21 | /* | ||
22 | * Cleans up the global resources used by LibXML | ||
23 | */ | ||
24 | void cleanup_parse(void); | ||
25 | |||
26 | #endif | ||
diff --git a/include/post.h b/include/post.h new file mode 100644 index 0000000..82c4ee5 --- /dev/null +++ b/include/post.h | |||
@@ -0,0 +1,25 @@ | |||
1 | #ifndef POST_H | ||
2 | #define POST_H | ||
3 | |||
4 | /* | ||
5 | * Represents a single syndicated post entry as received from a remote RSS feed | ||
6 | */ | ||
7 | typedef struct post_item_t { | ||
8 | char* title; | ||
9 | char* description; | ||
10 | char* url; | ||
11 | } post_item_t; | ||
12 | |||
13 | /* | ||
14 | * Frees the resources allocated for each of the fields in post_item_t | ||
15 | * | ||
16 | * Note that this WILL NOT free the provided pointer itself, only its | ||
17 | * constituent parts | ||
18 | */ | ||
19 | void free_post_item(post_item_t* item); | ||
20 | |||
21 | #ifndef RSSMAIL_POST_MAX_FIELD_LENGTH | ||
22 | #define RSSMAIL_POST_MAX_FIELD_LENGTH 2048 | ||
23 | #endif | ||
24 | |||
25 | #endif | ||
diff --git a/src/config.c b/src/config.c new file mode 100644 index 0000000..cf0e835 --- /dev/null +++ b/src/config.c | |||
@@ -0,0 +1,179 @@ | |||
1 | #include "config.h" | ||
2 | |||
3 | #ifdef __STDC_ALLOC_LIB__ | ||
4 | #define __STDC_WANT_LIB_EXT2__ 1 | ||
5 | #endif | ||
6 | |||
7 | #include <ctype.h> | ||
8 | #include <post.h> | ||
9 | #include <stdio.h> | ||
10 | #include <stdlib.h> | ||
11 | #include <string.h> | ||
12 | |||
13 | #include "log.h" | ||
14 | |||
15 | #define SMTP_FROM "from_address", "sender", "from" | ||
16 | #define SMTP_TO "to_address", "to" | ||
17 | #define SMTP_SUBJECT "subject", "subj", "sub" | ||
18 | #define SMTP_USERNAME "username", "user" | ||
19 | #define SMTP_PASSWORD "password", "pass", "passwd" | ||
20 | #define SMTP_HOST "server_host", "host" | ||
21 | #define SMTP_CERTPATH "certificate_path", "cert", "cert_path" | ||
22 | #define SMTP_PORT "server_port", "port" | ||
23 | #define SMTP_RECIPIENT "recipient", "rcpt" | ||
24 | |||
25 | #define RSS_URI "feed_uri", "feed_url", "feed", "uri", "url" | ||
26 | #define RSS_USERAGENT "useragent", "agent" | ||
27 | #define RSS_MAXAGE "max_age", "age" | ||
28 | |||
29 | #define COMMENT_TOKEN '#' | ||
30 | |||
31 | #define CFG_STR(line, fld, ...) \ | ||
32 | { \ | ||
33 | char* tokens[] = {__VA_ARGS__}; \ | ||
34 | for (int i = 0; i < sizeof(tokens) / sizeof(char*); ++i) { \ | ||
35 | if (strstr(line, tokens[i]) == (line) && \ | ||
36 | isspace(line[strlen(tokens[i])])) { \ | ||
37 | if ((fld) != NULL) { \ | ||
38 | free((fld)); \ | ||
39 | } \ | ||
40 | (fld) = copy_line_value(line); \ | ||
41 | LOG_DEBUG("%s = %s", tokens[i], fld); \ | ||
42 | break; \ | ||
43 | } \ | ||
44 | } \ | ||
45 | } | ||
46 | |||
47 | #define CFG_STRAPPEND(line, fld, n, ...) \ | ||
48 | { \ | ||
49 | char* tokens[] = {__VA_ARGS__}; \ | ||
50 | for (int i = 0; i < sizeof(tokens) / sizeof(char*); ++i) { \ | ||
51 | if (strstr(line, tokens[i]) == (line) && \ | ||
52 | isspace(line[strlen(tokens[i])])) { \ | ||
53 | (fld) = realloc(fld, ++(n) * sizeof(char*)); \ | ||
54 | (fld)[(n)-1] = copy_line_value(line); \ | ||
55 | LOG_DEBUG("%s (%d) = %s", tokens[i], n, (fld)[(n)-1]); \ | ||
56 | break; \ | ||
57 | } \ | ||
58 | } \ | ||
59 | } | ||
60 | |||
61 | #define CFG_INT(line, fld, ...) \ | ||
62 | { \ | ||
63 | char* tokens[] = {__VA_ARGS__}; \ | ||
64 | for (int i = 0; i < sizeof(tokens) / sizeof(char*); ++i) { \ | ||
65 | if (strstr(line, tokens[i]) == (line) && \ | ||
66 | isspace(line[strlen(tokens[i])])) { \ | ||
67 | char* strval = copy_line_value(line); \ | ||
68 | if (strval != NULL) { \ | ||
69 | (fld) = (int)strtol(strval, NULL, 10); \ | ||
70 | LOG_DEBUG("%s = %d", tokens[i], fld); \ | ||
71 | free(strval); \ | ||
72 | } \ | ||
73 | break; \ | ||
74 | } \ | ||
75 | } \ | ||
76 | } | ||
77 | |||
78 | static char* copy_line_value(char* line) { | ||
79 | // find first whitespace character | ||
80 | char* value = strchr(line, ' '); | ||
81 | if (value == NULL) { | ||
82 | return NULL; | ||
83 | } | ||
84 | |||
85 | // advanced until no-longer whitespace | ||
86 | while (isspace(value[0])) { | ||
87 | ++value; | ||
88 | } | ||
89 | |||
90 | // return a copy from that position | ||
91 | return strndup(value, strnlen(value, RSSMAIL_POST_MAX_FIELD_LENGTH) - 1); | ||
92 | } | ||
93 | |||
94 | // NOLINTNEXTLINE | ||
95 | static void parse_line(char* line, smtp_config_t* smtp, rss_config_t* rss) { | ||
96 | // check to see if the line starts with the comment char; if so, skip it | ||
97 | if (strchr(line, COMMENT_TOKEN) == line) { | ||
98 | return; | ||
99 | } | ||
100 | |||
101 | CFG_INT(line, smtp->port, SMTP_PORT); | ||
102 | CFG_STR(line, smtp->from, SMTP_FROM); | ||
103 | CFG_STR(line, smtp->to, SMTP_TO); | ||
104 | CFG_STR(line, smtp->subject, SMTP_SUBJECT); | ||
105 | CFG_STR(line, smtp->username, SMTP_USERNAME); | ||
106 | CFG_STR(line, smtp->password, SMTP_PASSWORD); | ||
107 | CFG_STR(line, smtp->host, SMTP_HOST); | ||
108 | CFG_STR(line, smtp->cert_path, SMTP_CERTPATH); | ||
109 | CFG_STRAPPEND(line, smtp->recipients, smtp->recipient_count, SMTP_RECIPIENT); | ||
110 | CFG_STRAPPEND(line, rss->uris, rss->uri_count, RSS_URI); | ||
111 | CFG_STR(line, rss->useragent, RSS_USERAGENT); | ||
112 | CFG_INT(line, rss->max_age, RSS_MAXAGE); | ||
113 | } | ||
114 | |||
115 | int read_config_file(const char* path, smtp_config_t* s, rss_config_t* r) { | ||
116 | FILE* file; | ||
117 | if ((file = fopen(path, "r")) == NULL) { | ||
118 | perror("Failed to open configuration file"); | ||
119 | return RSSMAIL_CONFIGURE_FAILURE; | ||
120 | } | ||
121 | |||
122 | char* line = NULL; | ||
123 | size_t len = 0; | ||
124 | while (getline(&line, &len, file) > 0) { | ||
125 | if (line != NULL) { | ||
126 | parse_line(line, s, r); | ||
127 | } | ||
128 | } | ||
129 | |||
130 | free(line); | ||
131 | |||
132 | fclose(file); | ||
133 | |||
134 | return RSSMAIL_CONFIGURE_SUCCESS; | ||
135 | } | ||
136 | |||
137 | void init_config(smtp_config_t* s, rss_config_t* r) { | ||
138 | s->recipient_count = 0; | ||
139 | s->port = 0; | ||
140 | s->from = NULL; | ||
141 | s->to = NULL; | ||
142 | s->subject = NULL; | ||
143 | s->username = NULL; | ||
144 | s->password = NULL; | ||
145 | s->host = NULL; | ||
146 | s->cert_path = NULL; | ||
147 | s->recipients = NULL; | ||
148 | |||
149 | r->uris = NULL; | ||
150 | r->uri_count = 0; | ||
151 | r->useragent = NULL; | ||
152 | } | ||
153 | |||
154 | void free_smtp_config(smtp_config_t* s) { | ||
155 | free(s->from); | ||
156 | free(s->to); | ||
157 | free(s->username); | ||
158 | free(s->password); | ||
159 | free(s->host); | ||
160 | free(s->cert_path); | ||
161 | free(s->subject); | ||
162 | |||
163 | for (int i = 0; i < s->recipient_count; ++i) { | ||
164 | free(s->recipients[i]); | ||
165 | } | ||
166 | |||
167 | free(s->recipients); | ||
168 | } | ||
169 | |||
170 | void free_rss_config(rss_config_t* r) { | ||
171 | free(r->useragent); | ||
172 | |||
173 | for (int i = 0; i < r->uri_count; ++i) { | ||
174 | free(r->uris[i]); | ||
175 | } | ||
176 | |||
177 | free(r->uris); | ||
178 | } | ||
179 | |||
diff --git a/src/fetch.c b/src/fetch.c new file mode 100644 index 0000000..3b156af --- /dev/null +++ b/src/fetch.c | |||
@@ -0,0 +1,219 @@ | |||
1 | #include "fetch.h" | ||
2 | |||
3 | #include <curl/curl.h> | ||
4 | #include <curl/multi.h> | ||
5 | #include <libxml/parser.h> | ||
6 | #include <string.h> | ||
7 | |||
8 | #include "log.h" | ||
9 | #include "parse.h" | ||
10 | |||
11 | #define REQUEST_TIMEOUT_MS (30 * 1000) | ||
12 | |||
13 | static size_t write_data(void* data, size_t size, size_t count, void* file) { | ||
14 | // as data comes in, push it directly into the XML parser | ||
15 | size_t length = size * count; | ||
16 | xmlParserCtxtPtr* parser = (xmlParserCtxtPtr*)file; | ||
17 | if (*parser == NULL) { | ||
18 | LOG_DEBUG("Initializing a document parser"); | ||
19 | |||
20 | // if this is the first time this is being called, create the parser | ||
21 | *parser = | ||
22 | xmlCreatePushParserCtxt(NULL, NULL, (char*)data, (int)length, NULL); | ||
23 | } else { | ||
24 | xmlParseChunk(*parser, (char*)data, (int)length, 0); | ||
25 | } | ||
26 | |||
27 | LOG_DEBUG("Received %zu bytes", count); | ||
28 | |||
29 | return length; | ||
30 | } | ||
31 | |||
32 | static void init_request(CURLM* curlm, rss_config_t* config, int uri_index) { | ||
33 | char* uri = config->uris[uri_index]; | ||
34 | |||
35 | CURL* curl = NULL; | ||
36 | if ((curl = curl_easy_init()) == NULL) { | ||
37 | LOG_ERROR("Failed to create CURL object for URI \"%s\"", uri); | ||
38 | return; | ||
39 | } | ||
40 | |||
41 | LOG_DEBUG("Fetching document from %s", uri); | ||
42 | |||
43 | xmlParserCtxtPtr* parser = malloc(sizeof(xmlParserCtxtPtr)); | ||
44 | *parser = NULL; | ||
45 | |||
46 | curl_easy_setopt(curl, CURLOPT_URL, uri); | ||
47 | curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); | ||
48 | curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data); | ||
49 | curl_easy_setopt(curl, CURLOPT_WRITEDATA, parser); | ||
50 | curl_easy_setopt(curl, CURLOPT_PRIVATE, parser); | ||
51 | curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); | ||
52 | curl_easy_setopt(curl, CURLOPT_USERAGENT, config->useragent); | ||
53 | |||
54 | #ifdef DEBUG | ||
55 | curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); | ||
56 | #endif | ||
57 | |||
58 | curl_multi_add_handle(curlm, curl); | ||
59 | } | ||
60 | |||
61 | static int parse_response_document(post_item_t** posts, xmlDocPtr doc, | ||
62 | rss_config_t* config, const char* url, | ||
63 | int total) { | ||
64 | int doc_post_count = 0; | ||
65 | post_item_t* doc_posts = parse_posts_from_doc(doc, &doc_post_count, config); | ||
66 | |||
67 | if (doc_posts != NULL) { | ||
68 | if (doc_post_count > 0) { | ||
69 | LOG_DEBUG("Found %d syndicated entries", doc_post_count); | ||
70 | post_item_t* expanded = | ||
71 | realloc(*posts, (total + doc_post_count) * sizeof(post_item_t)); | ||
72 | if (expanded == NULL) { | ||
73 | LOG_ERROR("Ran out of memory while fetching posts from %s", url); | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | memcpy(&expanded[total], doc_posts, doc_post_count * sizeof(post_item_t)); | ||
78 | *posts = expanded; | ||
79 | } | ||
80 | |||
81 | // only free the temporary buffer, not the post items themselves; the | ||
82 | // string pointers on the post structure need to remain valid after this | ||
83 | // point | ||
84 | free(doc_posts); | ||
85 | } | ||
86 | |||
87 | return doc_post_count; | ||
88 | } | ||
89 | |||
90 | static void wait_until_finished(CURLM* curlm) { | ||
91 | int running = 0; | ||
92 | curl_multi_perform(curlm, &running); | ||
93 | |||
94 | do { | ||
95 | int numfds = 0; | ||
96 | int result = curl_multi_wait(curlm, NULL, 0, REQUEST_TIMEOUT_MS, &numfds); | ||
97 | if (result != CURLM_OK) { | ||
98 | LOG_ERROR("An error occurred while waiting for a request to complete: %s", | ||
99 | curl_multi_strerror(result)); | ||
100 | break; | ||
101 | } | ||
102 | |||
103 | curl_multi_perform(curlm, &running); | ||
104 | } while (running); | ||
105 | } | ||
106 | |||
107 | static bool ensure_response_success_code(CURL* curl) { | ||
108 | int status_code = 0; | ||
109 | curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status_code); | ||
110 | |||
111 | if (status_code != 200) { | ||
112 | char* url = NULL; | ||
113 | curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &url); | ||
114 | if (url != NULL) { | ||
115 | LOG_ERROR("Received HTTP error code %d from %s", status_code, url); | ||
116 | } | ||
117 | |||
118 | curl_easy_cleanup(curl); | ||
119 | false; | ||
120 | } | ||
121 | |||
122 | return true; | ||
123 | } | ||
124 | |||
125 | static bool ensure_curl_success(CURLMsg* msg, CURL* curl) { | ||
126 | if (msg->data.result != CURLE_OK) { | ||
127 | char* url = NULL; | ||
128 | curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &url); | ||
129 | if (url != NULL) { | ||
130 | LOG_ERROR("Received CURL error code %d while fetching %s", | ||
131 | msg->data.result, url); | ||
132 | } else { | ||
133 | LOG_ERROR("Received CURL error code %ds", msg->data.result); | ||
134 | } | ||
135 | |||
136 | return false; | ||
137 | } | ||
138 | |||
139 | return true; | ||
140 | } | ||
141 | |||
142 | static void append_posts_from_parser(CURL* curl, int* total, | ||
143 | rss_config_t* config, | ||
144 | post_item_t** posts) { | ||
145 | xmlParserCtxtPtr* parser; | ||
146 | curl_easy_getinfo(curl, CURLINFO_PRIVATE, &parser); | ||
147 | if (parser != NULL) { | ||
148 | if (*parser != NULL) { | ||
149 | // terminate the parser | ||
150 | xmlParseChunk(*parser, NULL, 0, 1); | ||
151 | xmlDocPtr doc = (*parser)->myDoc; | ||
152 | xmlFreeParserCtxt(*parser); | ||
153 | |||
154 | char* url = NULL; | ||
155 | curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &url); | ||
156 | *total += parse_response_document(posts, doc, config, url, *total); | ||
157 | |||
158 | xmlFreeDoc(doc); | ||
159 | } | ||
160 | |||
161 | free(parser); | ||
162 | } | ||
163 | } | ||
164 | |||
165 | post_item_t* fetch_posts(rss_config_t* config, int* count) { | ||
166 | post_item_t* posts = NULL; | ||
167 | *count = 0; | ||
168 | |||
169 | CURLM* curlm = curl_multi_init(); | ||
170 | |||
171 | for (int i = 0; i < config->uri_count; ++i) { | ||
172 | init_request(curlm, config, i); | ||
173 | } | ||
174 | |||
175 | wait_until_finished(curlm); | ||
176 | |||
177 | CURLMsg* msg; | ||
178 | int msgs_left = 0; | ||
179 | while ((msg = curl_multi_info_read(curlm, &msgs_left))) { | ||
180 | if (msg->msg != CURLMSG_DONE) { | ||
181 | // log error | ||
182 | continue; | ||
183 | } | ||
184 | |||
185 | CURL* curl = msg->easy_handle; | ||
186 | |||
187 | curl_multi_remove_handle(curlm, curl); | ||
188 | |||
189 | if (!ensure_curl_success(msg, curl)) { | ||
190 | continue; | ||
191 | } | ||
192 | |||
193 | if (!ensure_response_success_code(curl)) { | ||
194 | continue; | ||
195 | } | ||
196 | |||
197 | append_posts_from_parser(curl, count, config, &posts); | ||
198 | |||
199 | curl_easy_cleanup(curl); | ||
200 | } | ||
201 | |||
202 | curl_multi_cleanup(curlm); | ||
203 | |||
204 | return posts; | ||
205 | } | ||
206 | |||
207 | void init_fetch(void) { | ||
208 | LOG_DEBUG("Initializing CURL globally"); | ||
209 | |||
210 | curl_global_init(CURL_GLOBAL_DEFAULT); | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * Cleans up global resources used for fetching data from URLs | ||
215 | */ | ||
216 | void cleanup_fetch(void) { | ||
217 | LOG_DEBUG("Cleaning up CURL global resources"); | ||
218 | curl_global_cleanup(); | ||
219 | } | ||
diff --git a/src/mail.c b/src/mail.c new file mode 100644 index 0000000..f72a797 --- /dev/null +++ b/src/mail.c | |||
@@ -0,0 +1,221 @@ | |||
1 | #include "mail.h" | ||
2 | |||
3 | #include <curl/curl.h> | ||
4 | #include <stdio.h> | ||
5 | #include <string.h> | ||
6 | #include <time.h> | ||
7 | |||
8 | #include "log.h" | ||
9 | |||
10 | #define URI_MAX_LENGTH 255 | ||
11 | #define NEWLINE "\r\n" | ||
12 | #define HEADER_LINE_BUFFER_LENGTH 256 | ||
13 | |||
14 | typedef struct message_payload_t { | ||
15 | FILE* file; | ||
16 | size_t bytes_sent; | ||
17 | size_t total_length; | ||
18 | } message_payload_t; | ||
19 | |||
20 | static void write_header_line(FILE* file, const char* name, char* value) { | ||
21 | char buffer[HEADER_LINE_BUFFER_LENGTH]; | ||
22 | size_t len = snprintf(&buffer[0], HEADER_LINE_BUFFER_LENGTH - 1, | ||
23 | "%s: %s" NEWLINE, name, value); | ||
24 | fwrite(&buffer[0], sizeof(char), len, file); | ||
25 | } | ||
26 | |||
27 | static void write_empty_line(FILE* file) { | ||
28 | fwrite(NEWLINE NEWLINE, sizeof(char), (sizeof(NEWLINE) - 1) * 2, file); | ||
29 | } | ||
30 | |||
31 | static void write_body_line(FILE* file, char* contents) { | ||
32 | size_t len = strnlen(contents, RSSMAIL_POST_MAX_FIELD_LENGTH); | ||
33 | fwrite(contents, sizeof(char), len, file); | ||
34 | } | ||
35 | |||
36 | static void write_post_body(FILE* file, post_item_t* post) { | ||
37 | write_body_line(file, post->title); | ||
38 | write_empty_line(file); | ||
39 | write_body_line(file, post->description); | ||
40 | write_empty_line(file); | ||
41 | write_body_line(file, post->url); | ||
42 | } | ||
43 | |||
44 | static void write_post_count(FILE* file, int count) { | ||
45 | char buffer[128]; | ||
46 | size_t len = | ||
47 | snprintf(buffer, sizeof(buffer), | ||
48 | "This message contains %d posts in a digest format assembled by " | ||
49 | "RSSMail." NEWLINE NEWLINE, | ||
50 | count); | ||
51 | fwrite(&buffer[0], sizeof(char), len, file); | ||
52 | } | ||
53 | |||
54 | static void write_divider(FILE* file) { | ||
55 | char buffer[] = NEWLINE NEWLINE NEWLINE "-----" NEWLINE NEWLINE NEWLINE; | ||
56 | fwrite(&buffer[0], sizeof(char), sizeof(buffer) - 1, file); | ||
57 | } | ||
58 | |||
59 | static void build_digest_body(FILE* file, post_item_t* posts, int count) { | ||
60 | write_post_count(file, count); | ||
61 | |||
62 | // write the first post (isn't followed by divider line) | ||
63 | write_post_body(file, &posts[0]); | ||
64 | |||
65 | // if there was only one post, we're done | ||
66 | if (count == 1) { | ||
67 | return; | ||
68 | } | ||
69 | |||
70 | // write the rest of the posts | ||
71 | for (int i = 1; i < count; ++i) { | ||
72 | write_divider(file); | ||
73 | write_post_body(file, &posts[i]); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | static void build_digest_headers(FILE* file, smtp_config_t* config, | ||
78 | int post_count) { | ||
79 | // get a local timestamp | ||
80 | time_t ticks = time(NULL); | ||
81 | struct tm* now = localtime(&ticks); | ||
82 | char date_buffer[HEADER_LINE_BUFFER_LENGTH]; | ||
83 | strftime(&date_buffer[0], HEADER_LINE_BUFFER_LENGTH, "%c", now); | ||
84 | |||
85 | // write headers | ||
86 | write_header_line(file, "Date", &date_buffer[0]); | ||
87 | write_header_line(file, "To", config->to); | ||
88 | write_header_line(file, "From", config->from); | ||
89 | write_header_line(file, "MIME-Version", "1.0"); | ||
90 | write_header_line(file, "Content-Transfer-Encoding", "8bit"); | ||
91 | write_header_line(file, "Content-Type", "text/plain;charset=utf-8"); | ||
92 | |||
93 | char subject_buffer[HEADER_LINE_BUFFER_LENGTH]; | ||
94 | snprintf(&subject_buffer[0], HEADER_LINE_BUFFER_LENGTH, "%s (%d posts)", | ||
95 | config->subject, post_count); | ||
96 | |||
97 | write_header_line(file, "Subject", &subject_buffer[0]); | ||
98 | |||
99 | // terminate with an additional cr/lf | ||
100 | write_empty_line(file); | ||
101 | } | ||
102 | |||
103 | static FILE* build_digest_payload(post_item_t* posts, int count, size_t* length, | ||
104 | smtp_config_t* config) { | ||
105 | #ifdef RSSMAIL_SEND_TO_FILE | ||
106 | FILE* file = fopen("rssmail_output", "w+"); | ||
107 | #else | ||
108 | FILE* file = tmpfile(); | ||
109 | #endif | ||
110 | |||
111 | build_digest_headers(file, config, count); | ||
112 | build_digest_body(file, posts, count); | ||
113 | |||
114 | // add null-terminator | ||
115 | // fwrite("\0", sizeof(char), 1, file); | ||
116 | |||
117 | *length = (size_t)ftell(file); | ||
118 | |||
119 | LOG_DEBUG("Digest payload size: %zu bytes", *length); | ||
120 | |||
121 | fseek(file, 0, SEEK_SET); | ||
122 | return file; | ||
123 | } | ||
124 | |||
125 | static void free_digest_payload(message_payload_t* payload) { | ||
126 | if (payload != NULL) { | ||
127 | if (payload->file != NULL) { | ||
128 | fclose(payload->file); | ||
129 | } | ||
130 | |||
131 | free(payload); | ||
132 | } | ||
133 | } | ||
134 | |||
135 | static size_t upload_payload(char* ptr, size_t size, size_t count, | ||
136 | void* userdata) { | ||
137 | message_payload_t* payload = (message_payload_t*)userdata; | ||
138 | if (payload->bytes_sent >= payload->total_length || size * count == 0 || | ||
139 | feof(payload->file) != 0) { | ||
140 | LOG_DEBUG("Reached the end of the payload"); | ||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | size_t bytes_to_send = count * size; | ||
145 | size_t bytes_remaining = payload->total_length - payload->bytes_sent; | ||
146 | if (bytes_to_send > bytes_remaining) { | ||
147 | bytes_to_send = bytes_remaining; | ||
148 | } | ||
149 | |||
150 | size_t bytes_sent = fread(ptr, 1, bytes_to_send, payload->file); | ||
151 | |||
152 | payload->bytes_sent += bytes_sent; | ||
153 | |||
154 | LOG_DEBUG("Sent %zu of %zu bytes to the mail server", payload->bytes_sent, | ||
155 | payload->total_length); | ||
156 | |||
157 | return bytes_sent; | ||
158 | } | ||
159 | |||
160 | int send_posts_as_digest(smtp_config_t* config, post_item_t* posts, int count) { | ||
161 | CURL* curl = NULL; | ||
162 | struct curl_slist* recipients = NULL; | ||
163 | message_payload_t* payload = NULL; | ||
164 | |||
165 | char smtp_uri[URI_MAX_LENGTH + 1]; | ||
166 | if (snprintf(&smtp_uri[0], URI_MAX_LENGTH, "smtps://%s:%d", config->host, | ||
167 | config->port) >= URI_MAX_LENGTH) { | ||
168 | LOG_ERROR( | ||
169 | "The provided SMTP URI is too long; the maximum allowed length is %d", | ||
170 | URI_MAX_LENGTH); | ||
171 | return RSSMAIL_SEND_FAILURE; | ||
172 | } | ||
173 | |||
174 | if ((curl = curl_easy_init()) == NULL) { | ||
175 | LOG_ERROR("Failed to create CURL prior to sending mail digest"); | ||
176 | return RSSMAIL_SEND_FAILURE; | ||
177 | } | ||
178 | |||
179 | for (int i = 0; i < config->recipient_count; ++i) { | ||
180 | recipients = curl_slist_append(recipients, config->recipients[i]); | ||
181 | } | ||
182 | |||
183 | payload = (message_payload_t*)malloc(sizeof(message_payload_t)); | ||
184 | payload->bytes_sent = 0; | ||
185 | payload->file = | ||
186 | build_digest_payload(posts, count, &payload->total_length, config); | ||
187 | |||
188 | curl_easy_setopt(curl, CURLOPT_MAIL_RCPT, recipients); | ||
189 | curl_easy_setopt(curl, CURLOPT_MAIL_FROM, config->from); | ||
190 | curl_easy_setopt(curl, CURLOPT_USERNAME, config->username); | ||
191 | curl_easy_setopt(curl, CURLOPT_PASSWORD, config->password); | ||
192 | curl_easy_setopt(curl, CURLOPT_URL, &smtp_uri[0]); | ||
193 | curl_easy_setopt(curl, CURLOPT_USE_SSL, (long)CURLUSESSL_ALL); | ||
194 | curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); | ||
195 | curl_easy_setopt(curl, CURLOPT_READFUNCTION, upload_payload); | ||
196 | curl_easy_setopt(curl, CURLOPT_READDATA, payload); | ||
197 | curl_easy_setopt(curl, CURLOPT_UPLOAD, 1L); | ||
198 | |||
199 | if (config->cert_path != NULL) { | ||
200 | curl_easy_setopt(curl, CURLOPT_CAINFO, config->cert_path); | ||
201 | } | ||
202 | |||
203 | #ifdef DEBUG | ||
204 | curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); | ||
205 | #endif | ||
206 | |||
207 | #ifndef RSSMAIL_SEND_TO_FILE | ||
208 | CURLcode result = curl_easy_perform(curl); | ||
209 | if (result != CURLE_OK) { | ||
210 | LOG_ERROR("Failed to send message: %s", curl_easy_strerror(result)); | ||
211 | } | ||
212 | #endif | ||
213 | |||
214 | free_digest_payload(payload); | ||
215 | |||
216 | curl_slist_free_all(recipients); | ||
217 | |||
218 | curl_easy_cleanup(curl); | ||
219 | |||
220 | return RSSMAIL_SEND_SUCCESS; | ||
221 | } | ||
diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..228604e --- /dev/null +++ b/src/main.c | |||
@@ -0,0 +1,63 @@ | |||
1 | #include <assert.h> | ||
2 | |||
3 | #include "config.h" | ||
4 | #include "fetch.h" | ||
5 | #include "log.h" | ||
6 | #include "mail.h" | ||
7 | #include "parse.h" | ||
8 | |||
9 | int main(int argc, const char** argv) { | ||
10 | if (argc < 2) { | ||
11 | LOG("Error: Missing configuration file path argument\n"); | ||
12 | LOG("Usage: rssmail options.conf [other_options.conf, ...]"); | ||
13 | return EXIT_FAILURE; | ||
14 | } | ||
15 | |||
16 | // I am relying on this assumption in many places for the sake of convenience | ||
17 | static_assert(sizeof(xmlChar) == sizeof(char), | ||
18 | "xmlChar and char are not the same size!"); | ||
19 | |||
20 | // this initializes global libcurl and libxml resources | ||
21 | init_fetch(); | ||
22 | init_parse(); | ||
23 | |||
24 | // initialize configuration objects | ||
25 | smtp_config_t smtp_config; | ||
26 | rss_config_t rss_config; | ||
27 | init_config(&smtp_config, &rss_config); | ||
28 | |||
29 | // read each of the configuration files whose paths were passed in as | ||
30 | // command-line arguments | ||
31 | for (int i = 1; i < argc; ++i) { | ||
32 | if (read_config_file(argv[i], &smtp_config, &rss_config) != 0) { | ||
33 | LOG_ERROR("Failed to read configuration file %s", argv[1]); | ||
34 | goto done; | ||
35 | } | ||
36 | } | ||
37 | |||
38 | // download all the RSS posts from the configured URI | ||
39 | int count; | ||
40 | post_item_t* posts = fetch_posts(&rss_config, &count); | ||
41 | |||
42 | // if there are posts | ||
43 | if (posts != NULL && count > 0) { | ||
44 | send_posts_as_digest(&smtp_config, posts, count); | ||
45 | } | ||
46 | |||
47 | // cleanup | ||
48 | for (int i = 0; i < count; ++i) { | ||
49 | free_post_item(&posts[i]); | ||
50 | } | ||
51 | |||
52 | free(posts); | ||
53 | |||
54 | done: | ||
55 | free_smtp_config(&smtp_config); | ||
56 | free_rss_config(&rss_config); | ||
57 | |||
58 | cleanup_parse(); | ||
59 | cleanup_fetch(); | ||
60 | |||
61 | EXIT_SUCCESS; | ||
62 | } | ||
63 | |||
diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..fe7b2b1 --- /dev/null +++ b/src/parse.c | |||
@@ -0,0 +1,231 @@ | |||
1 | #include "parse.h" | ||
2 | |||
3 | #include <curl/curl.h> | ||
4 | #include <libxml/HTMLparser.h> | ||
5 | #include <libxml/xpath.h> | ||
6 | #include <string.h> | ||
7 | #include <time.h> | ||
8 | |||
9 | #include "log.h" | ||
10 | |||
11 | static void extract_text_from_html(xmlChar* body, xmlChar** output) { | ||
12 | htmlDocPtr doc = NULL; | ||
13 | xmlXPathContextPtr xpath_ctx = NULL; | ||
14 | xmlXPathObjectPtr xpath_obj = NULL; | ||
15 | *output = NULL; | ||
16 | |||
17 | if (!(doc = htmlParseDoc(body, "UTF-8"))) { | ||
18 | return; | ||
19 | } | ||
20 | |||
21 | if (!(xpath_ctx = xmlXPathNewContext(doc))) { | ||
22 | goto done; | ||
23 | } | ||
24 | |||
25 | if (!(xpath_obj = | ||
26 | xmlXPathEvalExpression(BAD_CAST "//child::text()", xpath_ctx))) { | ||
27 | goto done; | ||
28 | } | ||
29 | |||
30 | size_t len = 0; | ||
31 | *output = calloc(RSSMAIL_POST_MAX_FIELD_LENGTH, sizeof(xmlChar)); | ||
32 | for (int i = 0; i < xpath_obj->nodesetval->nodeNr; ++i) { | ||
33 | xmlNodePtr node = xpath_obj->nodesetval->nodeTab[i]; | ||
34 | xmlChar* contents = xmlNodeGetContent(node); | ||
35 | if (contents != NULL) { | ||
36 | size_t contents_len = | ||
37 | strnlen((char*)contents, RSSMAIL_POST_MAX_FIELD_LENGTH - len); | ||
38 | if (contents_len + len < RSSMAIL_POST_MAX_FIELD_LENGTH) { | ||
39 | strncat(&((char*)(*output))[len], (char*)contents, contents_len); | ||
40 | } else { | ||
41 | LOG_DEBUG( | ||
42 | "Maximum field length (%d) was met while parsing HTML; text will " | ||
43 | "be " | ||
44 | "truncated", | ||
45 | RSSMAIL_POST_MAX_FIELD_LENGTH); | ||
46 | |||
47 | free(contents); | ||
48 | |||
49 | break; | ||
50 | } | ||
51 | |||
52 | free(contents); | ||
53 | } | ||
54 | } | ||
55 | |||
56 | done: | ||
57 | if (xpath_obj != NULL) { | ||
58 | xmlXPathFreeObject(xpath_obj); | ||
59 | } | ||
60 | |||
61 | if (xpath_ctx != NULL) { | ||
62 | xmlXPathFreeContext(xpath_ctx); | ||
63 | } | ||
64 | |||
65 | if (doc != NULL) { | ||
66 | xmlFreeDoc(doc); | ||
67 | } | ||
68 | } | ||
69 | |||
70 | static void extract_content_by_xpath(xmlXPathContextPtr xpath_ctx, | ||
71 | xmlNodePtr parent, const xmlChar* expr, | ||
72 | xmlChar** output) { | ||
73 | xmlXPathObjectPtr obj = xmlXPathNodeEval(parent, expr, xpath_ctx); | ||
74 | |||
75 | if (obj == NULL) { | ||
76 | LOG_ERROR("Failed to evaluate XPath expression \"%s\"", expr); | ||
77 | *output = NULL; | ||
78 | return; | ||
79 | } | ||
80 | |||
81 | *output = obj->nodesetval->nodeNr == 0 | ||
82 | ? NULL | ||
83 | : xmlNodeGetContent(obj->nodesetval->nodeTab[0]); | ||
84 | |||
85 | if (*output == NULL) { | ||
86 | LOG_DEBUG("Couldn't find any content matching xpath expression \"%s\"", | ||
87 | expr); | ||
88 | } | ||
89 | |||
90 | xmlXPathFreeObject(obj); | ||
91 | } | ||
92 | |||
93 | static void extract_parts_from_node(xmlXPathContextPtr xpath_ctx, | ||
94 | xmlNodePtr node, xmlChar** title, | ||
95 | xmlChar** desc, xmlChar** url) { | ||
96 | extract_content_by_xpath(xpath_ctx, node, BAD_CAST "title[1]", title); | ||
97 | |||
98 | extract_content_by_xpath(xpath_ctx, node, BAD_CAST "link[1]", url); | ||
99 | |||
100 | xmlChar* desc_body; | ||
101 | extract_content_by_xpath(xpath_ctx, node, | ||
102 | BAD_CAST "description/descendant-or-self::*/text()", | ||
103 | &desc_body); | ||
104 | |||
105 | if (desc_body != NULL) { | ||
106 | extract_text_from_html(desc_body, desc); | ||
107 | xmlFree(desc_body); | ||
108 | } else { | ||
109 | *desc = NULL; | ||
110 | } | ||
111 | |||
112 | // perform some debug logging | ||
113 | // (the rest of the function after this line should be optimized away on | ||
114 | // release builds) | ||
115 | |||
116 | if (title != NULL && *title != NULL) { | ||
117 | LOG_DEBUG("Title: %s", *title); | ||
118 | } else { | ||
119 | LOG_DEBUG("Missing title"); | ||
120 | } | ||
121 | |||
122 | if (desc != NULL && *desc != NULL) { | ||
123 | LOG_DEBUG("Description: %s", *desc); | ||
124 | } else { | ||
125 | LOG_DEBUG("Missing description"); | ||
126 | } | ||
127 | |||
128 | if (url != NULL && *url != NULL) { | ||
129 | LOG_DEBUG("URL: %s", *url); | ||
130 | } else { | ||
131 | LOG_DEBUG("Missing URL"); | ||
132 | } | ||
133 | } | ||
134 | |||
135 | static int get_node_published_age(xmlXPathContextPtr xpath_ctx, | ||
136 | xmlNodePtr node) { | ||
137 | xmlChar* text = NULL; | ||
138 | extract_content_by_xpath(xpath_ctx, node, BAD_CAST "pubDate[1]", &text); | ||
139 | if (text != NULL) { | ||
140 | time_t timestamp = curl_getdate((char*)text, NULL); | ||
141 | xmlFree(text); | ||
142 | return (int)(difftime(time(NULL), timestamp) / 60.0); | ||
143 | } | ||
144 | |||
145 | LOG_DEBUG( | ||
146 | "Couldn't determine the publication date of a post, so it will be " | ||
147 | "included"); | ||
148 | |||
149 | // couldn't get age, return 0 to guarantee inclusion | ||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | post_item_t* parse_posts_from_doc(xmlDocPtr doc, int* count, | ||
154 | rss_config_t* config) { | ||
155 | xmlXPathContextPtr xpath_ctx = NULL; | ||
156 | xmlXPathObjectPtr xpath_obj = NULL; | ||
157 | post_item_t* buffer = NULL; | ||
158 | |||
159 | // create xpath context | ||
160 | if (!(xpath_ctx = xmlXPathNewContext(doc))) { | ||
161 | LOG_ERROR("Failed to create new XPath context prior to parsing post items"); | ||
162 | goto done; | ||
163 | } | ||
164 | |||
165 | if (!(xpath_obj = xmlXPathEvalExpression(BAD_CAST "//item", xpath_ctx))) { | ||
166 | LOG_ERROR("Failed to evaluate top-level 'item' xpath expression"); | ||
167 | goto done; | ||
168 | } | ||
169 | |||
170 | int node_count = xpath_obj->nodesetval->nodeNr; | ||
171 | int n = 0; | ||
172 | |||
173 | // for (int i = *count - 1; i >= 0; --i) { | ||
174 | for (int i = 0; i < node_count; ++i) { | ||
175 | LOG_DEBUG("Reading node %d of %d", i + 1, node_count); | ||
176 | |||
177 | xmlNodePtr node = xpath_obj->nodesetval->nodeTab[node_count - i - 1]; | ||
178 | |||
179 | // check age of the post | ||
180 | int age = get_node_published_age(xpath_ctx, node); | ||
181 | if (age > config->max_age) { | ||
182 | LOG_DEBUG( | ||
183 | "Skipping post with an age of %d minutes over maximum " | ||
184 | "allowed (age is %d minutes)", | ||
185 | age - config->max_age, age); | ||
186 | continue; | ||
187 | } | ||
188 | |||
189 | LOG_DEBUG("Post is of a permissible age (%d minutes)", age); | ||
190 | |||
191 | // if age is good, add the post to the buffer | ||
192 | post_item_t* expanded = realloc(buffer, (n + 1) * sizeof(post_item_t)); | ||
193 | if (expanded == NULL) { | ||
194 | LOG_ERROR( | ||
195 | "Ran out of memory attempting to extract the contents of feed " | ||
196 | "entries"); | ||
197 | break; | ||
198 | } | ||
199 | |||
200 | buffer = expanded; | ||
201 | |||
202 | post_item_t* item = &buffer[n++]; | ||
203 | extract_parts_from_node(xpath_ctx, node, (xmlChar**)&item->title, | ||
204 | (xmlChar**)&item->description, | ||
205 | (xmlChar**)&item->url); | ||
206 | } | ||
207 | |||
208 | *count = n; | ||
209 | |||
210 | done: | ||
211 | if (xpath_obj != NULL) { | ||
212 | xmlXPathFreeObject(xpath_obj); | ||
213 | } | ||
214 | |||
215 | if (xpath_ctx != NULL) { | ||
216 | xmlXPathFreeContext(xpath_ctx); | ||
217 | } | ||
218 | |||
219 | return buffer; | ||
220 | } | ||
221 | |||
222 | void init_parse(void) { | ||
223 | LOG_DEBUG("Initializing LibXML global resources"); | ||
224 | xmlInitParser(); | ||
225 | LIBXML_TEST_VERSION; | ||
226 | } | ||
227 | |||
228 | void cleanup_parse(void) { | ||
229 | LOG_DEBUG("Cleaning up LibXML global resources"); | ||
230 | xmlCleanupParser(); | ||
231 | } | ||
diff --git a/src/post.c b/src/post.c new file mode 100644 index 0000000..e2f9d97 --- /dev/null +++ b/src/post.c | |||
@@ -0,0 +1,19 @@ | |||
1 | #include "post.h" | ||
2 | |||
3 | #include <stdlib.h> | ||
4 | |||
5 | void free_post_item(post_item_t* item) { | ||
6 | if (item != NULL) { | ||
7 | if (item->title != NULL) { | ||
8 | free(item->title); | ||
9 | } | ||
10 | |||
11 | if (item->description != NULL) { | ||
12 | free(item->description); | ||
13 | } | ||
14 | |||
15 | if (item->url != NULL) { | ||
16 | free(item->url); | ||
17 | } | ||
18 | } | ||
19 | } | ||