修訂 | 49 (tree) |
---|---|
時間 | 2021-12-08 23:29:39 |
作者 | ![]() |
frozen ver 20211208 to tags
@@ -0,0 +1,53 @@ | ||
1 | +<?php | |
2 | + | |
3 | +// db dump screen | |
4 | + | |
5 | +include '../db.php'; | |
6 | + | |
7 | +$db = db_load('../ogupdb.dat', true, true, true); | |
8 | +$cnt = db_counters('../ogupdb.cnt'); | |
9 | + | |
10 | +$dbtimestamp = gmdate("Y-m-d H:i", $cnt['timestamp']); | |
11 | + | |
12 | +echo "i\n"; | |
13 | +echo "i >>> OGUP live database dump (as of {$dbtimestamp} UTC) <<<\n"; | |
14 | +echo "i\n"; | |
15 | +echo "7Apply a filter on hostname\t$\n"; | |
16 | +echo "i\n"; | |
17 | +if (!empty($_SERVER['QUERY_STRING_SEARCH'])) { | |
18 | + $filter = trim($_SERVER['QUERY_STRING_SEARCH']); | |
19 | + echo "iResults are filtered to hostnames matching '{$filter}'\n"; | |
20 | + echo "i\n"; | |
21 | +} | |
22 | +echo "i ### | HOSTNAME | STATUS | NEXT CHECK (UTC) | LAST WORKING IP \n"; | |
23 | +echo "i-----+---------------------------------+-----------------------+------------------+-----------------\n"; | |
24 | +$i = 1; | |
25 | +foreach ($db as $item) { | |
26 | + $host = $item['host']; | |
27 | + if ($item['port'] != 70) $host .= ':' . $item['port']; | |
28 | + if ((!empty($filter)) && (stripos($host, $filter) === FALSE)) continue; | |
29 | + | |
30 | + $host = str_pad($host . ' ', 32, '.', STR_PAD_RIGHT); | |
31 | + | |
32 | + if ($item['failedsince'] == 0) { | |
33 | + $status = 'OK'; | |
34 | + } else if ($item['failedsince'] == 1) { | |
35 | + $status = 'pending verification'; | |
36 | + } else { | |
37 | + $status = 'down ' . gmdate("Y-m-d H:i", $item['failedsince']); | |
38 | + } | |
39 | + if ($item['nextcheck'] == 0) { | |
40 | + $nextcheck = "ASAP"; | |
41 | + } else { | |
42 | + $nextcheck = gmdate("Y-m-d H:i", $item['nextcheck']); | |
43 | + } | |
44 | + $status = str_pad($status, 22); | |
45 | + $nextcheck = str_pad($nextcheck, 16); | |
46 | + $id = str_pad($i, 4, ' ', STR_PAD_LEFT); | |
47 | + echo "i{$id} | {$host}| {$status}| {$nextcheck} | {$item['ipaddr']}\n"; | |
48 | + $i++; | |
49 | +} | |
50 | + | |
51 | +echo "i-----+---------------------------------+-----------------------+------------------+-----------------\n"; | |
52 | + | |
53 | +?> |
@@ -0,0 +1,83 @@ | ||
1 | +<?php | |
2 | + | |
3 | +// | |
4 | +// down servers menu | |
5 | +// | |
6 | +// Copyright (C) 2019-2021 Mateusz Viste | |
7 | +// | |
8 | + | |
9 | +include '../db.php'; | |
10 | + | |
11 | +function gen_unit($count, $name) { | |
12 | + $res = ''; | |
13 | + if ($count > 0) { | |
14 | + $res .= "{$count} {$name}"; | |
15 | + if ($count > 1) $res .= 's'; | |
16 | + $res .= ' '; | |
17 | + } | |
18 | + return($res); | |
19 | +} | |
20 | + | |
21 | +function time_diff_human($sec) { | |
22 | + $MINUTE = 60; | |
23 | + $HOUR = $MINUTE * 60; | |
24 | + $DAY = $HOUR * 24; | |
25 | + $WEEK = $DAY * 7; | |
26 | + for ($w = 0; $sec >= $WEEK; $sec -= $WEEK) $w++; | |
27 | + for ($d = 0; $sec >= $DAY; $sec -= $DAY) $d++; | |
28 | + for ($h = 0; $sec >= $HOUR; $sec -= $HOUR) $h++; | |
29 | + for ($m = 0; $sec >= $MINUTE; $sec -= $MINUTE) $m++; | |
30 | + /* */ | |
31 | + if ($w > 0) { | |
32 | + $res = gen_unit($w, 'week'); | |
33 | + if ($w < 10) $res .= gen_unit($d, 'day'); | |
34 | + } else if ($d > 0) { | |
35 | + $res = gen_unit($d, 'day'); | |
36 | + $res .= gen_unit($h, 'hour'); | |
37 | + } else if ($h > 0) { | |
38 | + $res = gen_unit($h, 'hour'); | |
39 | + $res .= gen_unit($m, 'min'); | |
40 | + } else if ($m > 0) { | |
41 | + $res = gen_unit($m, 'minute'); | |
42 | + } else { | |
43 | + $res = gen_unit($sec, 'second'); | |
44 | + } | |
45 | + return(trim($res)); | |
46 | +} | |
47 | + | |
48 | + | |
49 | +// custom sort of db items: failedsince-freshest first | |
50 | +function mysortfunc($a, $b) { | |
51 | + if ($a['failedsince'] < $b['failedsince']) return(1); | |
52 | + if ($a['failedsince'] > $b['failedsince']) return(-1); | |
53 | + return(0); | |
54 | +} | |
55 | + | |
56 | + | |
57 | +echo "i\n"; | |
58 | +echo "iThis page lists gopherspace nodes that have been observed by the OGUP\n"; | |
59 | +echo "ias down recently. All timestamps are given in terran UTC time.\n"; | |
60 | +echo "i\n"; | |
61 | + | |
62 | +$db = db_load('../ogupdb.dat', false, false, true); | |
63 | +usort($db, mysortfunc); | |
64 | + | |
65 | +if (count($db) > 0) { | |
66 | + echo "iGOPHER NODE DOWN SINCE\n"; | |
67 | +} else { | |
68 | + echo "iNothing to display. All known gopher servers appear to be healthy.\n"; | |
69 | +} | |
70 | + | |
71 | +foreach ($db as $item) { | |
72 | + $host = str_pad($item['host'] . ' ', 32, '.', STR_PAD_RIGHT); | |
73 | + $port = $item['port']; | |
74 | + $since = time_diff_human(time() - $item['failedsince']); | |
75 | + $sincedate = gmdate("Y-m-d H:i", $item['failedsince']); | |
76 | + $hostdownstr = str_pad($since, 16); | |
77 | + echo "i{$host} {$hostdownstr} ({$sincedate})\n"; | |
78 | +} | |
79 | + | |
80 | +echo "i\n"; | |
81 | +echo "i---------------------------------------------------------- [EOF] ---\n"; | |
82 | + | |
83 | +?> |
@@ -0,0 +1,49 @@ | ||
1 | +<?php | |
2 | + | |
3 | +// | |
4 | +// OGUP servers list gophermap | |
5 | +// | |
6 | +// Copyright (C) 2019-2021 Mateusz Viste | |
7 | +// | |
8 | + | |
9 | + | |
10 | +include '../db.php'; | |
11 | + | |
12 | +$cnt = db_counters('../ogupdb.cnt'); | |
13 | + | |
14 | +$cnt_tot = str_pad($cnt['total'], 6, ' ', STR_PAD_LEFT); | |
15 | +$cnt_val = str_pad($cnt['up'], 6, ' ', STR_PAD_LEFT); | |
16 | +$cnt_unk = str_pad($cnt['pending'], 6, ' ', STR_PAD_LEFT); | |
17 | +$cnt_dwn = str_pad($cnt['down'], 6, ' ', STR_PAD_LEFT); | |
18 | + | |
19 | +echo "i\n"; | |
20 | +echo "iThe Observable Gopherspace Universe Project knows about:\n"; | |
21 | +echo "i{$cnt_tot} servers total, of which:\n"; | |
22 | +echo "i{$cnt_val} have been validated as operational\n"; | |
23 | +echo "i{$cnt_dwn} are down\n"; | |
24 | +echo "i{$cnt_unk} are pending validation\n"; | |
25 | +echo "i\n"; | |
26 | +echo "iLast update: " . gmdate("Y-m-d H:i", $cnt['timestamp']) . " UTC\n"; | |
27 | +echo "i\n"; | |
28 | + | |
29 | +// list of servers | |
30 | +$db = db_load('../ogupdb.dat', true, false, false); | |
31 | + | |
32 | +if (! $db) { | |
33 | + echo "3FAILED TO OPEN DB FILE\n"; | |
34 | +} else { | |
35 | + foreach ($db as $item) { | |
36 | + if ($item['port'] != 70) { | |
37 | + $nicename = $item['host'] . ':' . $item['port']; | |
38 | + } else { | |
39 | + $nicename = $item['host']; | |
40 | + } | |
41 | + echo "1{$nicename}\t\t{$item['host']}\t{$item['port']}\n"; | |
42 | + } | |
43 | + fclose($fh); | |
44 | +} | |
45 | + | |
46 | +echo "i\n"; | |
47 | +echo "i----------------------------- [EOF] ---\n"; | |
48 | + | |
49 | +?> |
@@ -0,0 +1,22 @@ | ||
1 | +<?php | |
2 | + | |
3 | +// | |
4 | +// a few random servers | |
5 | +// | |
6 | +// Copyright (C) 2019-2021 Mateusz Viste | |
7 | +// | |
8 | + | |
9 | +include '../db.php'; | |
10 | + | |
11 | +$db = db_load('../ogupdb.dat', true, false, false); | |
12 | +shuffle($db); | |
13 | + | |
14 | +for ($i = 0; $i < 5; $i++) { | |
15 | + $host = $db[$i]['host']; | |
16 | + $port = $db[$i]['port']; | |
17 | + $nice = $host; | |
18 | + if ($port != 70) $nice .= ':' . $port; | |
19 | + echo "1{$nice}\t\t{$host}\t{$port}\n"; | |
20 | +} | |
21 | + | |
22 | +?> |
@@ -0,0 +1,59 @@ | ||
1 | +<?php | |
2 | + | |
3 | +$serv = ''; | |
4 | +if (!empty($_SERVER['QUERY_STRING_SEARCH'])) { | |
5 | + $serv = strtolower(trim($_SERVER['QUERY_STRING_SEARCH'])); | |
6 | +} | |
7 | + | |
8 | +// no query? output default screen | |
9 | + | |
10 | +if (empty($serv)) { | |
11 | + echo "i\r\n"; | |
12 | + echo "iThe Observable Gopherspace Universe Project actively scans the universe\r\n"; | |
13 | + echo "ilooking for new gopher nodes, but it still may miss some. Help us by\r\n"; | |
14 | + echo "iproviding suggestions!\r\n"; | |
15 | + echo "i\r\n"; | |
16 | + echo "iSubmitting a new gopher server suggestion is easy: use the form below to\r\n"; | |
17 | + echo "ienter the address of the server you'd like to add to the OGUP database.\r\n"; | |
18 | + echo "iThat's it! Within a couple of hours OGUP will verify and it add to the\r\n"; | |
19 | + echo "ilist of known servers.\r\n"; | |
20 | + echo "i\r\n"; | |
21 | + echo "7Submit a gopher server to the OGUP (format: \"gopher.example.net\")\t?\r\n"; | |
22 | + echo "i\r\n"; | |
23 | + echo "iNotes:\r\n"; | |
24 | + echo "i * Only suggestions for server names or IP addresses are accepted (no paths).\r\n"; | |
25 | + echo "i * For a gopher server to be accepted, it must present at least one link in\r\n"; | |
26 | + echo "i its main menu that points to a self-hosted page.\r\n"; | |
27 | +} else { | |
28 | + | |
29 | + // process submission | |
30 | + | |
31 | + sleep(2); // just to avoid someone hammers the server with some stupid script | |
32 | + | |
33 | + echo "iYou have submitted the following input: \"{$serv}\"\r\n"; | |
34 | + echo "i\r\n"; | |
35 | + | |
36 | + if ((preg_match('/[a-z0-9][a-z0-9.:]*/', $serv) != 1) || (preg_match('/.*\..*/', $serv) != 1)) { | |
37 | + echo "3ERROR: INVALID INPUT.\r\n"; | |
38 | + echo "i\r\n"; | |
39 | + echo "iSubmissions:\r\n"; | |
40 | + echo "i - must not contain characters other than a-z, 0-9, '.' and ':'.\r\n"; | |
41 | + echo "i - must start with either an a-z or 0-9 character\r\n"; | |
42 | + echo "i - must contain at least one dot\r\n"; | |
43 | + } else { | |
44 | + $fname = md5($serv); | |
45 | + if (file_exists('/tmp/ogup/' . $fname)) { | |
46 | + echo "3ERROR: This entry has been already submitted.\r\n"; | |
47 | + } else { | |
48 | + mkdir('/tmp/ogup'); // create if does not exist yet | |
49 | + file_put_contents('/tmp/ogup/' . $fname, $serv); | |
50 | + echo "iYour submission has been accepted. Thank you!\r\n"; | |
51 | + } | |
52 | + } | |
53 | + | |
54 | +} | |
55 | + | |
56 | +echo "i\r\n"; | |
57 | +echo "1Go back to OGUP main screen\t../\r\n"; | |
58 | + | |
59 | +?> |
@@ -0,0 +1,61 @@ | ||
1 | +<?php | |
2 | + | |
3 | +// | |
4 | +// db-related functions used by the PHP frontend | |
5 | +// | |
6 | +// Copyright (C) 2019-2021 Mateusz Viste | |
7 | +// | |
8 | + | |
9 | +// returns array with 5 counters: total, up, pending, down, timestamp | |
10 | +function db_counters($f) { | |
11 | + $handle = fopen($f, "r"); | |
12 | + if ($handle === FALSE) return(FALSE); | |
13 | + $csv = fgetcsv($handle, 64, ','); | |
14 | + fclose($handle); | |
15 | + // ogup.cnt is a CSV file with 4 values: count, countactive, countpending, countdown | |
16 | + $res = array(); | |
17 | + $res['total'] = $csv[0]; | |
18 | + $res['up'] = $csv[1]; | |
19 | + $res['pending'] = $csv[2]; | |
20 | + $res['down'] = $csv[3]; | |
21 | + $res['timestamp'] = stat($f)['mtime']; | |
22 | + | |
23 | + return($res); | |
24 | +} | |
25 | + | |
26 | + | |
27 | +// load the db into an array of items: | |
28 | +// ['host'] | |
29 | +// ['port'] | |
30 | +// ['failedsince'] => timestamp since failure (0=up, 1=pending) | |
31 | +// ['nextcheck'] => time when server should be checked again | |
32 | +// ['ipaddr'] => last known (working) IP | |
33 | +function db_load($f, $include_up, $include_pending, $include_down) { | |
34 | + $fh = fopen('../ogupdb.dat', 'r'); | |
35 | + if (! $fh) return(FALSE); | |
36 | + | |
37 | + // srv,port,downsince,nextcheck | |
38 | + $res = array(); | |
39 | + while (($lin = fgetcsv($fh)) !== FALSE) { | |
40 | + // include down? | |
41 | + if ((intval($lin[2]) > 1) && (!$include_down)) continue; | |
42 | + // include pending? | |
43 | + if ((intval($lin[2]) == 1) && (!$include_pending)) continue; | |
44 | + // include up? | |
45 | + if ((intval($lin[2]) == 0) && (!$include_up)) continue; | |
46 | + // add to array | |
47 | + $item = array(); | |
48 | + $item['host'] = $lin[0]; | |
49 | + $item['port'] = intval($lin[1]); | |
50 | + $item['failedsince'] = intval($lin[2]); | |
51 | + $item['nextcheck'] = intval($lin[3]); | |
52 | + $item['ipaddr'] = $lin[4]; | |
53 | + $res[] = $item; | |
54 | + } | |
55 | + fclose($fh); | |
56 | + | |
57 | + return($res); | |
58 | +} | |
59 | + | |
60 | + | |
61 | +?> |
@@ -0,0 +1,13 @@ | ||
1 | +<?php | |
2 | + | |
3 | +// OGUP main gophermap's stats footer | |
4 | +// Copyright (C) 2019-2021 Mateusz Viste | |
5 | + | |
6 | +include 'db.php'; | |
7 | + | |
8 | +$cnt = db_counters('ogupdb.cnt'); | |
9 | +$timestr = gmdate("Y-m-d", $cnt['timestamp']); | |
10 | + | |
11 | +echo "iAs of {$timestr}, the OGUP knows about {$cnt['up']} active servers.\n"; | |
12 | + | |
13 | +?> |
@@ -0,0 +1,17 @@ | ||
1 | +# | |
2 | +# build gopherjoker | |
3 | +# | |
4 | +# Observable Gopherspace Universe Project | |
5 | +# Copyright (C) 2019-2021 Mateusz Viste | |
6 | +# | |
7 | + | |
8 | +CC = clang | |
9 | +CFLAGS = -g -O2 -Wall -Wextra -pedantic -std=gnu89 | |
10 | +CFLAGS += -Weverything -Wno-disabled-macro-expansion -Wno-padded | |
11 | + | |
12 | +all: gopherjoker | |
13 | + | |
14 | +gopherjoker: csv.o glist.o gopher.o gopherjoker.o | |
15 | + | |
16 | +clean: | |
17 | + rm -f gopherjoker *.o |
@@ -0,0 +1,45 @@ | ||
1 | +/* | |
2 | + * CSV-reading C module | |
3 | + * | |
4 | + * Copyright (C) 2019 Mateusz Viste | |
5 | + * | |
6 | + */ | |
7 | + | |
8 | +#include "csv.h" | |
9 | + | |
10 | +/* reads a csv line from file. returns 0 on success, -1 on EOF or error. */ | |
11 | +int csv_readline(char *buf, unsigned short maxlinelen, char **ptrs, int maxptrs, FILE *fd) { | |
12 | + int c; | |
13 | + int i; | |
14 | + int t; | |
15 | + int blen = 0; | |
16 | + /* read the line into buf */ | |
17 | + for (;;) { | |
18 | + c = fgetc(fd); | |
19 | + if ((c < 0) && (blen == 0)) return(-1); | |
20 | + if ((c < 0) || (c == '\n')) break; | |
21 | + if (c == '\r') continue; | |
22 | + buf[blen++] = (char)c; | |
23 | + if (blen == maxlinelen) return(-1); | |
24 | + } | |
25 | + buf[blen] = 0; | |
26 | + | |
27 | + /* set up pointers and terminate fields */ | |
28 | + for (i = 0; i < maxptrs; i++) ptrs[i] = buf + blen; /* preinit to empty */ | |
29 | + ptrs[0] = buf; | |
30 | + t = 1; | |
31 | + for (i = 0;; i++) { | |
32 | + switch (buf[i]) { | |
33 | + case ',': | |
34 | + buf[i] = 0; | |
35 | + ptrs[t] = buf + i + 1; | |
36 | + if ((t + 1) < maxptrs) t++; | |
37 | + break; | |
38 | + case '\r': | |
39 | + case '\n': | |
40 | + case 0: | |
41 | + buf[i] = 0; | |
42 | + return(0); | |
43 | + } | |
44 | + } | |
45 | +} |
@@ -0,0 +1,16 @@ | ||
1 | +/* | |
2 | + * CSV-reading C module | |
3 | + * | |
4 | + * Copyright (C) 2019 Mateusz Viste | |
5 | + * | |
6 | + */ | |
7 | + | |
8 | +#ifndef CSV_H | |
9 | +#define CSV_H | |
10 | + | |
11 | +#include <stdio.h> | |
12 | + | |
13 | +/* reads a csv line from file. returns 0 on success, -1 on EOF or error. */ | |
14 | +int csv_readline(char *buf, unsigned short maxlinelen, char **ptrs, int maxptrs, FILE *fd); | |
15 | + | |
16 | +#endif |
@@ -0,0 +1,143 @@ | ||
1 | +/* | |
2 | + * glist-handling routines | |
3 | + * This file is part of the Obsevable Gopherspace Universe Project | |
4 | + * Copyright (C) 2019-2021 Mateusz Viste | |
5 | + */ | |
6 | + | |
7 | +#include <ctype.h> /* tolower() */ | |
8 | +#include <stdlib.h> | |
9 | +#include <string.h> /* strcpy() */ | |
10 | +#include <time.h> | |
11 | + | |
12 | +#include "glist.h" | |
13 | + | |
14 | + | |
15 | +/* rotate an uint32_t value by 1 bit left */ | |
16 | +static void rotl(uint32_t *u) { | |
17 | + uint32_t bit; | |
18 | + bit = *u & 1; | |
19 | + *u <<= 1; | |
20 | + *u |= bit; | |
21 | +} | |
22 | + | |
23 | + | |
24 | +/* compute hash of a glist node (ie. a host, port, selector tuple) - used to | |
25 | + * determine the identity of a glist entry. this is a somewhat specialized | |
26 | + * version of a BSD sum. */ | |
27 | +uint32_t glist_node_hash(const struct gopherlist *node) { | |
28 | + uint32_t res = 0; | |
29 | + int i; | |
30 | + /* start with port */ | |
31 | + res = node->port; | |
32 | + /* add host (case-insensitive) */ | |
33 | + for (i = 0; node->fqdn[i] != 0; i++) { | |
34 | + rotl(&res); | |
35 | + res ^= (uint32_t)tolower(node->fqdn[i]); | |
36 | + } | |
37 | + /* add selector */ | |
38 | + for (i = 0; (node->selector != NULL) && (node->selector[i] != 0); i++) { | |
39 | + rotl(&res); | |
40 | + res ^= (uint32_t)node->selector[i]; | |
41 | + } | |
42 | + return(res); | |
43 | +} | |
44 | + | |
45 | + | |
46 | +void glist_free(struct gopherlist *glist) { | |
47 | + struct gopherlist *victim; | |
48 | + while (glist) { | |
49 | + free(glist->selector); | |
50 | + victim = glist; | |
51 | + glist = glist->next; | |
52 | + free(victim); | |
53 | + } | |
54 | +} | |
55 | + | |
56 | + | |
57 | +struct gopherlist *glist_findhostport(struct gopherlist *glist, const char *host, const unsigned short port) { | |
58 | + struct gopherlist *node; | |
59 | + for (node = glist; node != NULL; node = node->next) { | |
60 | + if ((node->port == port) && (strcasecmp(node->fqdn, host) == 0)) return(node); | |
61 | + } | |
62 | + return(NULL); | |
63 | +} | |
64 | + | |
65 | + | |
66 | +/* allocate a gopherlist node with host:port filled in */ | |
67 | +struct gopherlist *glist_node_alloc(const char *host, unsigned short port) { | |
68 | + struct gopherlist *node; | |
69 | + | |
70 | + /* alloc struct */ | |
71 | + node = calloc(1, sizeof(struct gopherlist) + strlen(host) + 1); | |
72 | + | |
73 | + /* fill in fields */ | |
74 | + if (node != NULL) { | |
75 | + strcpy(node->fqdn, host); | |
76 | + node->port = port; | |
77 | + } | |
78 | + | |
79 | + return(node); | |
80 | +} | |
81 | + | |
82 | + | |
83 | +/* add new host to glist, unless said host:port pair already exists there. | |
84 | + * returns pointer to the new (or already existing) struct. NULL on error. */ | |
85 | +struct gopherlist *glist_addnewhostport(struct gopherlist **glist, const char *newhost, unsigned short newport, time_t failedsince) { | |
86 | + struct gopherlist *node; | |
87 | + | |
88 | + /* is entry in list already? */ | |
89 | + node = glist_findhostport(*glist, newhost, newport); | |
90 | + if (node != NULL) return(node); | |
91 | + | |
92 | + /* create new node */ | |
93 | + node = glist_node_alloc(newhost, newport); | |
94 | + if (node == NULL) return(NULL); | |
95 | + | |
96 | + /* fill failedsince */ | |
97 | + node->failedsince = failedsince; | |
98 | + | |
99 | + /* attach the newly created node to glist */ | |
100 | + glist_node_chain(glist, node); | |
101 | + | |
102 | + return(node); | |
103 | +} | |
104 | + | |
105 | + | |
106 | +void glist_node_free(struct gopherlist **node) { | |
107 | + if (*node == NULL) return; | |
108 | + free((*node)->selector); | |
109 | + free(*node); | |
110 | + *node = NULL; | |
111 | +} | |
112 | + | |
113 | + | |
114 | +/* insert node into a glist */ | |
115 | +void glist_node_chain(struct gopherlist **glist, struct gopherlist *node) { | |
116 | + node->prev = NULL; | |
117 | + node->next = *glist; | |
118 | + if (*glist != NULL) (*glist)->prev = node; | |
119 | + *glist = node; | |
120 | +} | |
121 | + | |
122 | + | |
123 | +void glist_node_unchain(struct gopherlist **glist, struct gopherlist *node) { | |
124 | + if (node->prev == NULL) { | |
125 | + *glist = node->next; | |
126 | + if (*glist != NULL) (*glist)->prev = NULL; | |
127 | + } else { | |
128 | + node->prev->next = node->next; | |
129 | + if (node->next != NULL) node->next->prev = node->prev; | |
130 | + } | |
131 | +} | |
132 | + | |
133 | + | |
134 | +struct gopherlist *glist_node_dup(struct gopherlist *node) { | |
135 | + struct gopherlist *res; | |
136 | + if (node == NULL) return(NULL); | |
137 | + res = calloc(1, sizeof(struct gopherlist) + strlen(node->fqdn)); | |
138 | + if (res == NULL) return(NULL); | |
139 | + | |
140 | + memcpy(res, node, sizeof(struct gopherlist) + strlen(node->fqdn)); | |
141 | + if (node->selector != NULL) res->selector = strdup(node->selector); | |
142 | + return(res); | |
143 | +} |
@@ -0,0 +1,49 @@ | ||
1 | +/* | |
2 | + * glist-handling routines | |
3 | + * This file is part of the Obsevable Gopherspace Universe Project | |
4 | + * Copyright (C) 2019-2021 Mateusz Viste | |
5 | + */ | |
6 | + | |
7 | +#ifndef GLIST_H | |
8 | +#define GLIST_H | |
9 | + | |
10 | +#include <stdint.h> | |
11 | + | |
12 | +struct gopherlist { | |
13 | + time_t failedsince; | |
14 | + time_t nextcheck; | |
15 | + struct gopherlist *next; | |
16 | + struct gopherlist *prev; | |
17 | + unsigned short port; | |
18 | + char *selector; | |
19 | + char ipaddr[64]; | |
20 | + char fqdn[1]; | |
21 | +}; | |
22 | + | |
23 | + | |
24 | +/* compute hash of a glist node (ie. a host, port, selector tuple) - used to | |
25 | + * determine the identity of a glist entry. this is a somewhat specialized | |
26 | + * version of a BSD sum. */ | |
27 | +uint32_t glist_node_hash(const struct gopherlist *node); | |
28 | + | |
29 | +void glist_free(struct gopherlist *glist); | |
30 | + | |
31 | +struct gopherlist *glist_findhostport(struct gopherlist *glist, const char *host, const unsigned short port); | |
32 | + | |
33 | +/* allocate a gopherlist node with host:port filled in */ | |
34 | +struct gopherlist *glist_node_alloc(const char *host, unsigned short port); | |
35 | + | |
36 | +/* add new host to glist, unless said host:port pair already exists there. | |
37 | + * returns pointer to the new (or already existing) struct. NULL on error. */ | |
38 | +struct gopherlist *glist_addnewhostport(struct gopherlist **glist, const char *newhost, unsigned short newport, time_t failedsince); | |
39 | + | |
40 | +void glist_node_free(struct gopherlist **node); | |
41 | + | |
42 | +/* insert node into a glist */ | |
43 | +void glist_node_chain(struct gopherlist **glist, struct gopherlist *node); | |
44 | + | |
45 | +void glist_node_unchain(struct gopherlist **glist, struct gopherlist *node); | |
46 | + | |
47 | +struct gopherlist *glist_node_dup(struct gopherlist *node); | |
48 | + | |
49 | +#endif |
@@ -0,0 +1,162 @@ | ||
1 | +/* | |
2 | + * gopher-related routines | |
3 | + * This file is part of the Obsevable Gopherspace Universe Project | |
4 | + * Copyright (C) 2019-2021 Mateusz Viste | |
5 | + */ | |
6 | + | |
7 | +#include <arpa/inet.h> /* inet_pton() */ | |
8 | +#include <errno.h> | |
9 | +#include <fcntl.h> | |
10 | +#include <netdb.h> /* gethostbyname() */ | |
11 | +#include <stdlib.h> /* atoi() */ | |
12 | +#include <string.h> /* strlen() */ | |
13 | +#include <sys/types.h> | |
14 | +#include <sys/socket.h> | |
15 | +#include <time.h> /* ctime(), time_t */ | |
16 | +#include <unistd.h> /* close() */ | |
17 | + | |
18 | + | |
19 | +#include "gopher.h" | |
20 | + | |
21 | + | |
22 | +static int connect_nonblocking(int s, const struct sockaddr *addr, socklen_t addrlen, int timeout) { | |
23 | + int r; | |
24 | + struct timeval t; | |
25 | + fd_set selset; | |
26 | + | |
27 | + r = connect(s, addr, addrlen); | |
28 | + if (r == 0) return(0); | |
29 | + if (errno != EINPROGRESS) return(r); | |
30 | + FD_ZERO(&selset); | |
31 | + FD_SET(s, &selset); | |
32 | + t.tv_sec = timeout; | |
33 | + t.tv_usec = 0; | |
34 | + r = select(s + 1, NULL, &selset, NULL, &t); | |
35 | + if (r < 0) return(-3); | |
36 | + r = connect(s, addr, addrlen); | |
37 | + return(r); | |
38 | +} | |
39 | + | |
40 | + | |
41 | +long gopher_fetch(char *buff, size_t buffsz, const char *host, unsigned short port, const char *selector, char *ipstr, size_t ipstrsz) { | |
42 | + int sock = -1; | |
43 | + size_t len; | |
44 | + int flags; | |
45 | + time_t timeout; | |
46 | + struct addrinfo *addr, *addrptr; | |
47 | + | |
48 | + ipstr[0] = 0; | |
49 | + | |
50 | + /* resolve host & connect */ | |
51 | + if (getaddrinfo(host, NULL, NULL, &addr) != 0) return(-1); | |
52 | + for (addrptr = addr; addrptr != NULL; addrptr = addrptr->ai_next) { | |
53 | + struct sockaddr_in *sin; | |
54 | + sock = socket(addrptr->ai_family, addrptr->ai_socktype, addrptr->ai_protocol); | |
55 | + if (sock < 0) continue; | |
56 | + /* set port */ | |
57 | + sin = (void *)(addrptr->ai_addr); | |
58 | + sin->sin_port = htons(port); | |
59 | + /* set socket as non-blocking */ | |
60 | + flags = fcntl(sock, F_GETFL); | |
61 | + fcntl(sock, F_SETFL, flags | O_NONBLOCK); | |
62 | + /* try to connect */ | |
63 | + if (connect_nonblocking(sock, addrptr->ai_addr, addrptr->ai_addrlen, 10) == 0) { | |
64 | + /* fill ipstr and continue */ | |
65 | + getnameinfo(addrptr->ai_addr, addrptr->ai_addrlen, ipstr, (socklen_t)ipstrsz, NULL, 0, NI_NUMERICHOST); | |
66 | + break; | |
67 | + } else { /* close sock and try next option (if any) */ | |
68 | + close(sock); | |
69 | + sock = -1; | |
70 | + } | |
71 | + } | |
72 | + freeaddrinfo(addr); | |
73 | + if (sock < 0) return(-2); | |
74 | + | |
75 | + /* send selector, terminated by a CR/LF pair */ | |
76 | + if (selector != NULL) send(sock, selector, strlen(selector), MSG_MORE); | |
77 | + send(sock, "\r\n", 2, 0); | |
78 | + | |
79 | + /* fetch answer until end of transmission or timeout */ | |
80 | + len = 0; | |
81 | + timeout = time(NULL) + 10; | |
82 | + for (;;) { | |
83 | + size_t spaceleft = buffsz - len; | |
84 | + ssize_t rlen; | |
85 | + if (time(NULL) > timeout) { | |
86 | + close(sock); | |
87 | + return(-3); | |
88 | + } | |
89 | + if (spaceleft == 0) break; /* I'm stuffed thank you */ | |
90 | + rlen = recv(sock, buff + len, spaceleft, 0); | |
91 | + if (rlen == 0) break; /* orderly shutdown */ | |
92 | + if (rlen < 0) { /* sock error */ | |
93 | + if ((errno == EWOULDBLOCK) || (errno == EAGAIN)) continue; | |
94 | + close(sock); | |
95 | + return(-4); | |
96 | + } | |
97 | + len += (size_t)rlen; | |
98 | + } | |
99 | + | |
100 | + /* close sock and quit */ | |
101 | + close(sock); | |
102 | + return((long)len); | |
103 | +} | |
104 | + | |
105 | + | |
106 | +/* parse a gopher menu line entry and fill host, port and selector accordingly. | |
107 | + * returns 0 on success, non-zero otherwise. */ | |
108 | +int gopher_menu_parseline(char *host, unsigned short hostsz, unsigned short *port, char *selector, unsigned short selectorsz, const char *menu, long menulen) { | |
109 | + char portstr[8]; | |
110 | + unsigned short i = 0, t; | |
111 | + /* skip first tabs (description) */ | |
112 | + for (;;) { | |
113 | + i++; | |
114 | + /* printf("i=%d ['%c']\n", i, menu[i]); */ | |
115 | + if (menu[i] == '\t') break; | |
116 | + if (menu[i] == '\r') return(-2); | |
117 | + if (menu[i] == '\n') return(-3); | |
118 | + if (i >= menulen) return(-4); | |
119 | + } | |
120 | + /* read selector */ | |
121 | + for (t = 0; ; t++) { | |
122 | + if (t >= selectorsz) return(-5); | |
123 | + i++; | |
124 | + if (i >= menulen) return(-6); | |
125 | + if (menu[i] == '\r') return(-7); | |
126 | + if (menu[i] == '\n') return(-8); | |
127 | + selector[t] = menu[i]; | |
128 | + if (selector[t] == '\t') { | |
129 | + selector[t] = 0; | |
130 | + break; | |
131 | + } | |
132 | + } | |
133 | + /* read hostname */ | |
134 | + for (t = 0; ; t++) { | |
135 | + if (t >= hostsz) return(-9); | |
136 | + i++; | |
137 | + if (i >= menulen) return(-10); | |
138 | + if (menu[i] == '\r') return(-11); | |
139 | + if (menu[i] == '\n') return(-12); | |
140 | + host[t] = menu[i]; | |
141 | + if (host[t] == '\t') { | |
142 | + host[t] = 0; | |
143 | + break; | |
144 | + } | |
145 | + } | |
146 | + /* read port */ | |
147 | + for (t = 0; ; t++) { | |
148 | + if (t >= sizeof(portstr)) return(-13); | |
149 | + i++; | |
150 | + if (i >= menulen) return(-14); | |
151 | + portstr[t] = menu[i]; | |
152 | + if ((portstr[t] == '\t') || (portstr[t] == '\n')) { | |
153 | + int tport; | |
154 | + portstr[t] = 0; | |
155 | + tport = atoi(portstr); | |
156 | + if ((tport < 1) || (tport > 0xffff)) return(-15); | |
157 | + *port = (unsigned short)tport; | |
158 | + break; | |
159 | + } | |
160 | + } | |
161 | + return(0); | |
162 | +} |
@@ -0,0 +1,16 @@ | ||
1 | +/* | |
2 | + * gopher-related routines | |
3 | + * This file is part of the Obsevable Gopherspace Universe Project | |
4 | + * Copyright (C) 2019-2021 Mateusz Viste | |
5 | + */ | |
6 | + | |
7 | +#ifndef GOPHER_H | |
8 | +#define GOPHER_H | |
9 | + | |
10 | +long gopher_fetch(char *buff, size_t buffsz, const char *host, unsigned short port, const char *selector, char *ipstr, size_t ipstrsz); | |
11 | + | |
12 | +/* parse a gopher menu line entry and fill host, port and selector accordingly. | |
13 | + * returns 0 on success, non-zero otherwise. */ | |
14 | +int gopher_menu_parseline(char *host, unsigned short hostsz, unsigned short *port, char *selector, unsigned short selectorsz, const char *menu, long menulen); | |
15 | + | |
16 | +#endif |
@@ -0,0 +1,581 @@ | ||
1 | +/* | |
2 | + * gopherjoker, part of the Observable Gopherspace Universe Project | |
3 | + * Copyright (C) 2019-2021 Mateusz Viste | |
4 | + * | |
5 | + * 2021-12-07: write last good IP address to database for every host | |
6 | + * 2021-12-06: scheduler for probing servers + internal cache of recent checks | |
7 | + * 2021-12-04: added --saveperiod and --waitperiod cmdline parameters | |
8 | + * 2021-12-03: new hosts can be fed to gopherjoker through /tmp/ogup/ | |
9 | + * 2020-01-13: added recent history so joker won't revisit places too often | |
10 | + * 2020-01-13: variety of minor fixes and casts to shut clang warnings | |
11 | + * 2019-03-09: hosts with no menu entries are kept in db - but as IP only | |
12 | + * 2019-02-27: host is removed if its main menu has no reference to itself | |
13 | + * 2019-02-20: fixed null ptr dereference, added a 'down' count to countfile | |
14 | + * 2019-02-19: countfile contains 3 values: total, active and pending servers | |
15 | + * 2019-02-18: first public release | |
16 | + * | |
17 | + * This software is made available under the terms of the MIT License: | |
18 | + * | |
19 | + * Permission is hereby granted, free of charge, to any person obtaining a | |
20 | + * copy of this software and associated documentation files (the "Software"), | |
21 | + * to deal in the Software without restriction, including without limitation | |
22 | + * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
23 | + * and/or sell copies of the Software, and to permit persons to whom the | |
24 | + * Software is furnished to do so, subject to the following conditions: | |
25 | + * | |
26 | + * The above copyright notice and this permission notice shall be included in | |
27 | + * all copies or substantial portions of the Software. | |
28 | + * | |
29 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
30 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
31 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
32 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
33 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
34 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
35 | + * DEALINGS IN THE SOFTWARE. | |
36 | + */ | |
37 | + | |
38 | +#include <dirent.h> /* opendir() */ | |
39 | +#include <errno.h> | |
40 | +#include <stdio.h> /* printf(), fopen(), ... */ | |
41 | +#include <stdlib.h> /* calloc(), rand() */ | |
42 | +#include <string.h> /* strcpy() */ | |
43 | +#include <time.h> /* ctime(), time_t */ | |
44 | +#include <unistd.h> /* sleep() */ | |
45 | + | |
46 | +#include "csv.h" | |
47 | +#include "glist.h" | |
48 | +#include "gopher.h" | |
49 | + | |
50 | + | |
51 | +#define WAITPERIOD_DEFAULT 20 | |
52 | +#define SAVEPERIOD_DEFAULT 3600 | |
53 | +#define NEWHOSTSDIR "/tmp/ogup/" | |
54 | +#define MAXFAILTIME (3600 * 24 * 60) | |
55 | +#define TTLINIT 64 | |
56 | +#define CHECKPERIOD (3600 * 24) | |
57 | +#define CHECKPERIOD_FAST (3600 * 2) | |
58 | +#define NOJOB_SLEEP 60 /* how long to sleep if there is nothing to do */ | |
59 | + | |
60 | +static int WAITPERIOD = WAITPERIOD_DEFAULT; | |
61 | +static int SAVEPERIOD = SAVEPERIOD_DEFAULT; | |
62 | + | |
63 | + | |
64 | +/**************** FUNCTIONS ****************/ | |
65 | + | |
66 | + | |
67 | +/* return a human date/time string based on time_t timestamp */ | |
68 | +static char *epoch2human(time_t timestamp) { | |
69 | + static char buff[64]; | |
70 | + struct tm *t = gmtime(×tamp); | |
71 | + strftime(buff, sizeof(buff), "%F %R", t); | |
72 | + return(buff); | |
73 | +} | |
74 | + | |
75 | + | |
76 | +/* returns pointer to dbfile name */ | |
77 | +static int parseargs(int argc, char **argv, char **dbfile, char **dbcount) { | |
78 | + int i; | |
79 | + *dbfile = NULL; | |
80 | + *dbcount = NULL; | |
81 | + for (i = 1; i < argc; i++) { | |
82 | + if (strcasecmp(argv[i], "--saveperiod") == 0) { | |
83 | + i++; | |
84 | + SAVEPERIOD = atoi(argv[i]); | |
85 | + if (SAVEPERIOD < 1) { | |
86 | + fprintf(stderr, "ERROR: --saveperiod must be a positive integer\n"); | |
87 | + return(-1); | |
88 | + } | |
89 | + } else if (strcasecmp(argv[i], "--waitperiod") == 0) { | |
90 | + i++; | |
91 | + WAITPERIOD = atoi(argv[i]); | |
92 | + if (WAITPERIOD < 1) { | |
93 | + fprintf(stderr, "ERROR: --waitperiod must be a positive integer\n"); | |
94 | + return(-1); | |
95 | + } | |
96 | + } else if ((argv[i][0] != '-') && ((*dbfile == NULL) || (*dbcount == NULL))) { | |
97 | + if (*dbfile == NULL) { | |
98 | + *dbfile = argv[i]; | |
99 | + } else { | |
100 | + *dbcount = argv[i]; | |
101 | + } | |
102 | + } else { | |
103 | + /* unknown argument */ | |
104 | + return(-1); | |
105 | + } | |
106 | + } | |
107 | + if ((*dbfile == NULL) || (*dbcount == NULL)) return(-2); | |
108 | + return(0); | |
109 | +} | |
110 | + | |
111 | + | |
112 | +static struct gopherlist *loaddb(const char *fname) { | |
113 | + FILE *fd; | |
114 | + int i; | |
115 | + char *ptrs[8]; | |
116 | + char lbuf[512]; | |
117 | + struct gopherlist *res = NULL, *newnode; | |
118 | + fd = fopen(fname, "rb"); | |
119 | + if (fd == NULL) return(NULL); | |
120 | + for (;;) { | |
121 | + i = csv_readline(lbuf, sizeof(lbuf), ptrs, 6, fd); | |
122 | + if (i != 0) break; | |
123 | + /* TSV line structure | |
124 | + * 0 hostname | |
125 | + * 1 port | |
126 | + * 2 failedsince (time_t) | |
127 | + * 3 nextcheck (time_t) | |
128 | + * 4 ipaddr (string) */ | |
129 | + newnode = glist_addnewhostport(&res, ptrs[0], (unsigned short)atoi(ptrs[1]), atol(ptrs[2])); | |
130 | + if (newnode == NULL) { | |
131 | + /* on error, free list and quit */ | |
132 | + glist_free(res); | |
133 | + return(NULL); | |
134 | + } | |
135 | + newnode->nextcheck = atoi(ptrs[3]); | |
136 | + strcpy(newnode->ipaddr, ptrs[4]); | |
137 | + } | |
138 | + fclose(fd); | |
139 | + return(res); | |
140 | +} | |
141 | + | |
142 | + | |
143 | +static unsigned long savedb(const char *fname, const char *fcount, struct gopherlist *glist) { | |
144 | + unsigned long count = 0, countactive = 0, countpending = 0, countdown = 0; | |
145 | + FILE *f; | |
146 | + struct gopherlist *node; | |
147 | + f = fopen(fname, "wb"); | |
148 | + if (f == NULL) return(0); | |
149 | + for (node = glist; node != NULL; node = node->next) { | |
150 | + count++; | |
151 | + if (node->failedsince == 0) countactive++; | |
152 | + if (node->failedsince == 1) countpending++; | |
153 | + if (node->failedsince > 1) countdown++; | |
154 | + fprintf(f, "%s,%u,%ld,%ld,%s\n", node->fqdn, node->port, node->failedsince, node->nextcheck, node->ipaddr); | |
155 | + } | |
156 | + fclose(f); | |
157 | + f = fopen(fcount, "wb"); | |
158 | + if (f == NULL) return(0); | |
159 | + fprintf(f, "%lu,%lu,%lu,%lu\n", count, countactive, countpending, countdown); | |
160 | + fclose(f); | |
161 | + return(count); | |
162 | +} | |
163 | + | |
164 | + | |
165 | +/* open fname and read a server name from it, write it to *s. returns | |
166 | + * port number on success, neg value on error */ | |
167 | +static int get_server_from_file(char *s, size_t ssz, const char *fname) { | |
168 | + FILE *f; | |
169 | + int port = 70; | |
170 | + int i; | |
171 | + | |
172 | + /* open file */ | |
173 | + f = fopen(fname, "rb"); | |
174 | + if (f == NULL) { | |
175 | + printf("ERROR: failed to open file %s (%s)\r\n", fname, strerror(errno)); | |
176 | + return(-1); | |
177 | + } | |
178 | + | |
179 | + /* read server string */ | |
180 | + if (fgets(s, (int)ssz, f) == NULL) { | |
181 | + printf("read failure from %s: %s\n", fname, strerror(errno)); | |
182 | + fclose(f); | |
183 | + return(-1); | |
184 | + } | |
185 | + fclose(f); | |
186 | + | |
187 | + /* parse (look for a ':') */ | |
188 | + for (i = 0; s[i] != 0; s++) { | |
189 | + if ((s[i] == '\r') || (s[i] == '\n')) { | |
190 | + s[i] = 0; | |
191 | + break; | |
192 | + } | |
193 | + if (s[i] == ':') { | |
194 | + s[i++] = 0; | |
195 | + port = atoi(s + i); | |
196 | + break; | |
197 | + } | |
198 | + } | |
199 | + | |
200 | + if ((port < 1) || (port > 65535)) { | |
201 | + printf("invalid port: host=%s port=%d\n", s, port); | |
202 | + return(-1); | |
203 | + } | |
204 | + | |
205 | + return(port); | |
206 | +} | |
207 | + | |
208 | + | |
209 | +/* insert any newly-subnmitted servers to memory database (and remove the new host file) */ | |
210 | +static struct gopherlist *loadextrahosts(struct gopherlist *glist, const char *dir) { | |
211 | + char fname[128]; | |
212 | + char buff[128]; | |
213 | + DIR *d; | |
214 | + struct dirent *dptr; | |
215 | + int port; | |
216 | + | |
217 | + d = opendir(dir); | |
218 | + if (d == NULL) return(glist); | |
219 | + | |
220 | + while ((dptr = readdir(d)) != NULL) { | |
221 | + if (dptr->d_type != DT_REG) continue; /* skip anything that's not a regular file (includes symlinks) */ | |
222 | + strcpy(fname, dir); | |
223 | + strcat(fname, "/"); | |
224 | + strcat(fname, dptr->d_name); | |
225 | + printf("loading new host file: %s\n", buff); | |
226 | + port = get_server_from_file(buff, sizeof(buff), fname); | |
227 | + if (port > 0) { | |
228 | + if (glist_addnewhostport(&glist, buff, (unsigned short)port, 1) != NULL) { | |
229 | + printf("added host: %s:%d\r\n", buff, port); | |
230 | + } else { | |
231 | + puts("glist_addnewhostport() call failed"); | |
232 | + } | |
233 | + } | |
234 | + /* remove file and go to next one */ | |
235 | + unlink(fname); | |
236 | + } | |
237 | + closedir(d); | |
238 | + | |
239 | + return(glist); | |
240 | +} | |
241 | + | |
242 | + | |
243 | +/* pick a glist node that needs to be probed */ | |
244 | +static struct gopherlist *pickhostfromlist(struct gopherlist *glist) { | |
245 | + struct gopherlist *gptr, *res; | |
246 | + time_t now = time(NULL); | |
247 | + | |
248 | + if (glist == NULL) return(NULL); | |
249 | + | |
250 | + /* find the most late entry */ | |
251 | + res = glist; | |
252 | + for (gptr = glist; gptr != NULL; gptr = gptr->next) { | |
253 | + if (gptr->nextcheck < res->nextcheck) res = gptr; | |
254 | + } | |
255 | + | |
256 | + /* is it late at all? */ | |
257 | + if (res->nextcheck > now) { | |
258 | + printf("pickhostfromlist(): found no candidates to be checked (next candidate is %s:%u to be checked at %s\n", res->fqdn, res->port, epoch2human(res->nextcheck)); | |
259 | + return(NULL); | |
260 | + } | |
261 | + | |
262 | + if (res->nextcheck == 0) { | |
263 | + printf("pickhostfromlist(): %s:%u needs to be checked (never checked yet)\n", res->fqdn, res->port); | |
264 | + } else { | |
265 | + printf("pickhostfromlist(): %s:%u needs to be checked (late by %ds)\n", res->fqdn, res->port, (int)(now - res->nextcheck)); | |
266 | + } | |
267 | + return(res); | |
268 | +} | |
269 | + | |
270 | + | |
271 | +static struct gopherlist *pickrandhostfromlist(struct gopherlist *glist) { | |
272 | + struct gopherlist *gptr; | |
273 | + unsigned long cnt; | |
274 | + unsigned long i; | |
275 | + | |
276 | + if (glist == NULL) return(NULL); | |
277 | + | |
278 | + /* count how many entries I have */ | |
279 | + cnt = 0; | |
280 | + for (gptr = glist; gptr != NULL; gptr = gptr->next) cnt++; | |
281 | + | |
282 | + /* random entry id */ | |
283 | + i = (unsigned long)rand() % cnt; | |
284 | + | |
285 | + /* fast-forward to the given entry and return it */ | |
286 | + while (i--) glist = glist->next; | |
287 | + return(glist); | |
288 | +} | |
289 | + | |
290 | + | |
291 | +static int ishostvalid(const char *host) { | |
292 | + unsigned short i; | |
293 | + for (i = 0; host[i] != 0; i++) { | |
294 | + if ((host[i] >= 'a') && (host[i] <= 'z')) continue; | |
295 | + if ((host[i] >= 'A') && (host[i] <= 'Z')) continue; | |
296 | + if ((host[i] >= '0') && (host[i] <= '9')) continue; | |
297 | + if (host[i] == '-') continue; | |
298 | + if (host[i] == '.') continue; | |
299 | + return(-1); | |
300 | + } | |
301 | + if (i < 3) return(-10); /* host len should be at least 3 chars long */ | |
302 | + return(0); | |
303 | +} | |
304 | + | |
305 | + | |
306 | +/* parses a gopher menu and remembers all the hosts present in its links */ | |
307 | +static struct gopherlist *menu2gopherlist(const char *menu, long menulen) { | |
308 | + long i; | |
309 | + struct gopherlist *res = NULL; | |
310 | + struct gopherlist *node; | |
311 | + int n; | |
312 | + | |
313 | + /* DEBUG */ | |
314 | + printf("*\t*\t*\t*\t*\t*\t*\n"); | |
315 | + n = 0; | |
316 | + for (i = 0; i < menulen; i++) { | |
317 | + printf("%c", menu[i]); | |
318 | + if ((menu[i] == '\n') && (++n >= 8)) break; /* limit debug output to 8 lines */ | |
319 | + } | |
320 | + printf("*\t*\t*\t*\t*\t*\t*\n"); | |
321 | + | |
322 | + /* iterate line by line */ | |
323 | + for (i = 0; i < menulen; i++) { | |
324 | + char host[64]; | |
325 | + char selector[128]; | |
326 | + unsigned short port; | |
327 | + char type; | |
328 | + | |
329 | + /* skip to next line (unless I only started) */ | |
330 | + if (i > 0) { | |
331 | + while ((i < menulen) && (menu[i] != '\n')) i++; | |
332 | + while ((i < menulen) && (menu[i] == '\n')) i++; | |
333 | + } | |
334 | + if (i >= menulen) break; | |
335 | + | |
336 | + type = menu[i]; | |
337 | + | |
338 | + if ((type != '0') && (type != '1')) continue; | |
339 | + | |
340 | + if (gopher_menu_parseline(host, sizeof(host), &port, selector, sizeof(selector), menu + i, menulen - i) != 0) continue; | |
341 | + | |
342 | + /* validate host name */ | |
343 | + if (ishostvalid(host) != 0) continue; | |
344 | + | |
345 | + /* alloc a new node */ | |
346 | + node = glist_node_alloc(host, port); | |
347 | + if (node == NULL) { | |
348 | + glist_free(res); | |
349 | + printf("ERR: OUT OF MEMORY\n"); | |
350 | + return(NULL); | |
351 | + } | |
352 | + | |
353 | + /* attach the newly created node to glist */ | |
354 | + glist_node_chain(&res, node); | |
355 | + | |
356 | + if (menu[i] == '1') node->selector = strdup(selector); | |
357 | + } | |
358 | + | |
359 | + return(res); | |
360 | +} | |
361 | + | |
362 | + | |
363 | +/* a cached wrapper around gopher_fetch() */ | |
364 | +static long cached_gopher_fetch(char *buff, size_t buffsz, const char *host, unsigned short port, const char *selector, char *ipstr, size_t ipstrsz) { | |
365 | + long res; | |
366 | + int i; | |
367 | + char id[512]; | |
368 | + static unsigned short nextentry; | |
369 | + | |
370 | + static struct goph_cache { | |
371 | + char *id; | |
372 | + char *ipstr; | |
373 | + char *data; | |
374 | + long bytes; | |
375 | + } CACHE[TTLINIT]; | |
376 | + | |
377 | + /* compute a string that contains the host/port/selector tuple */ | |
378 | + snprintf(id, sizeof(id), "%s|%u|%s", host, port, (selector == NULL)?"":selector); | |
379 | + | |
380 | + /* scan my cache, perhaps I have the content already */ | |
381 | + for (i = 0; i < TTLINIT; i++) { | |
382 | + if (CACHE[i].id == NULL) continue; | |
383 | + if (strcasecmp(CACHE[i].id, id) != 0) continue; | |
384 | + /* found match! */ | |
385 | + printf("found id='%s' in CACHE[%d] (%ld bytes)\n", id, i, CACHE[i].bytes); | |
386 | + if (CACHE[i].bytes > 0) memcpy(buff, CACHE[i].data, (unsigned long)(CACHE[i].bytes)); | |
387 | + if (CACHE[i].ipstr) strcpy(ipstr, CACHE[i].ipstr); | |
388 | + return(CACHE[i].bytes); | |
389 | + } | |
390 | + | |
391 | + /* not found - fetch it for real */ | |
392 | + res = gopher_fetch(buff, buffsz, host, port, selector, ipstr, ipstrsz); | |
393 | + | |
394 | + /* free old fields of the cache entry */ | |
395 | + free(CACHE[nextentry].id); | |
396 | + free(CACHE[nextentry].ipstr); | |
397 | + free(CACHE[nextentry].data); | |
398 | + memset(&(CACHE[nextentry]), 0, sizeof(struct goph_cache)); | |
399 | + | |
400 | + /* write result to cache */ | |
401 | + CACHE[nextentry].id = strdup(id); | |
402 | + CACHE[nextentry].ipstr = strdup(ipstr); | |
403 | + if (res > 0) { | |
404 | + CACHE[nextentry].data = malloc((unsigned long)res); | |
405 | + memcpy(CACHE[nextentry].data, buff, (unsigned long)res); | |
406 | + } else { | |
407 | + CACHE[nextentry].data = NULL; | |
408 | + } | |
409 | + CACHE[nextentry].bytes = res; | |
410 | + | |
411 | + /* */ | |
412 | + nextentry++; | |
413 | + nextentry %= TTLINIT; | |
414 | + | |
415 | + /* return */ | |
416 | + return(res); | |
417 | +} | |
418 | + | |
419 | + | |
420 | +static void mark_server_down(struct gopherlist **glist, const char *host, unsigned short port) { | |
421 | + struct gopherlist *gnode; | |
422 | + | |
423 | + gnode = glist_findhostport(*glist, host, port); | |
424 | + | |
425 | + if (gnode == NULL) { | |
426 | + printf("INTERNAL ERROR: tried to remove a server that is not found in glist ('%s' + port %u)\n", host, port); | |
427 | + } else if (gnode->failedsince == 0) { | |
428 | + /* server was working at some point in the past, but not anymore */ | |
429 | + gnode->failedsince = time(NULL); | |
430 | + gnode->nextcheck = time(NULL) + CHECKPERIOD_FAST; | |
431 | + printf("server %s:%u went down -> flaged as failed since now (%s)\n", gnode->fqdn, gnode->port, epoch2human(gnode->failedsince)); | |
432 | + } else if (time(NULL) > gnode->failedsince + MAXFAILTIME) { | |
433 | + /* remove server from the list */ | |
434 | + printf("server removed due to long-time failure: %s:%u (failed since %s)\n", gnode->fqdn, gnode->port, epoch2human(gnode->failedsince)); | |
435 | + glist_node_unchain(glist, gnode); | |
436 | + glist_node_free(&gnode); | |
437 | + } | |
438 | +} | |
439 | + | |
440 | + | |
441 | +/**************** MAIN ****************/ | |
442 | + | |
443 | +int main(int argc, char **argv) { | |
444 | + time_t nextaction = 0; | |
445 | + time_t nextdbsave; | |
446 | + struct gopherlist *glist, *mlist, *gnode; | |
447 | + struct gopherlist *curhost = NULL; | |
448 | + char curhost_ipaddr[64]; | |
449 | + int ttl = 0; | |
450 | + char buff[0xffff]; | |
451 | + long bufflen; | |
452 | + char *dbfile, *dbfilecnt; | |
453 | + | |
454 | + if (parseargs(argc, argv, &dbfile, &dbfilecnt) != 0) { | |
455 | + printf("usage: gopherjoker [options] dbfile.csv dbcount.csv\n" | |
456 | + "\n" | |
457 | + "options:\n" | |
458 | + "--waitperiod p sets delay period between spidering actions to p seconds (default=%d)\n" | |
459 | + "--saveperiod p sets save frequency of the database file to p seconds (default=%d)\n" | |
460 | + "\n", | |
461 | + WAITPERIOD_DEFAULT, | |
462 | + SAVEPERIOD_DEFAULT | |
463 | + ); | |
464 | + return(1); | |
465 | + } | |
466 | + | |
467 | + nextdbsave = time(NULL) + SAVEPERIOD; | |
468 | + | |
469 | + /* load db file */ | |
470 | + glist = loaddb(dbfile); | |
471 | + | |
472 | + /* init random engine */ | |
473 | + srand((unsigned int)time(NULL)); | |
474 | + | |
475 | + for (;;) { | |
476 | + | |
477 | + printf("\n\n\n--- [%s] ---\n", epoch2human(time(NULL))); | |
478 | + | |
479 | + /* do not browse too fast */ | |
480 | + while (time(NULL) < nextaction) sleep(1); | |
481 | + nextaction = time(NULL) + WAITPERIOD; | |
482 | + | |
483 | + /* if extra manual hosts required to be added, do it now */ | |
484 | + glist = loadextrahosts(glist, NEWHOSTSDIR); | |
485 | + | |
486 | + /* save the db once every hour */ | |
487 | + if (time(NULL) > nextdbsave) { | |
488 | + unsigned long savedbcount; | |
489 | + printf("dumping hosts lists to %s\n", dbfile); | |
490 | + savedbcount = savedb(dbfile, dbfilecnt, glist); | |
491 | + if (savedbcount == 0) printf("ERR: savedbcount == 0 @ %d\n", __LINE__); | |
492 | + nextdbsave = time(NULL) + SAVEPERIOD; | |
493 | + } | |
494 | + | |
495 | + /* if ttl expired, pick a new host to browse */ | |
496 | + if ((--ttl == 0) || (curhost == NULL)) { | |
497 | + printf("picking a new host to process...\n"); | |
498 | + curhost = pickhostfromlist(glist); | |
499 | + if (curhost == NULL) { | |
500 | + printf("no hosts to process. you can add some through " NEWHOSTSDIR ".\n"); | |
501 | + sleep(NOJOB_SLEEP); | |
502 | + continue; | |
503 | + } | |
504 | + curhost->nextcheck = time(NULL) + CHECKPERIOD; | |
505 | + curhost = glist_node_dup(curhost); | |
506 | + ttl = TTLINIT; | |
507 | + } | |
508 | + | |
509 | + printf("[TTL=%d] fetching %s:%u/1%s ...\n", ttl, curhost->fqdn, curhost->port, (curhost->selector == NULL)?"":curhost->selector); | |
510 | + | |
511 | + /* fetch selector */ | |
512 | + bufflen = cached_gopher_fetch(buff, sizeof(buff), curhost->fqdn, curhost->port, curhost->selector, curhost_ipaddr, sizeof(curhost_ipaddr)); | |
513 | + if (bufflen < 1) { /* fail */ | |
514 | + printf("failed\n"); | |
515 | + /* if it was about root selector, see if it's time to drop the server */ | |
516 | + if (curhost->selector == NULL) mark_server_down(&glist, curhost->fqdn, curhost->port); | |
517 | + glist_node_free(&curhost); | |
518 | + continue; | |
519 | + } | |
520 | + | |
521 | + printf("ok (%ld bytes, IP=%s)\n", bufflen, curhost_ipaddr); | |
522 | + | |
523 | + /* menu to glist */ | |
524 | + mlist = menu2gopherlist(buff, bufflen); | |
525 | + if (mlist == NULL) { | |
526 | + printf("ERR: no entries found in menu\n"); | |
527 | + /* if it was about root selector, consider lack of entries as a down state */ | |
528 | + if (curhost->selector == NULL) mark_server_down(&glist, curhost->fqdn, curhost->port); | |
529 | + /* */ | |
530 | + glist_node_free(&curhost); | |
531 | + continue; | |
532 | + } | |
533 | + | |
534 | + /* try adding hosts to global glist */ | |
535 | + for (gnode = mlist; gnode != NULL; gnode = gnode->next) { | |
536 | + glist_addnewhostport(&glist, gnode->fqdn, gnode->port, 1); | |
537 | + } | |
538 | + | |
539 | + /* if main menu, then check that host is pointing to himself */ | |
540 | + if (curhost->selector == NULL) { | |
541 | + /* make sure the server points to itself */ | |
542 | + if (glist_findhostport(mlist, curhost->fqdn, curhost->port) == NULL) { | |
543 | + printf("ERR: main menu contains no link to self, dropping host:port '%s':%u\n", curhost->fqdn, curhost->port); | |
544 | + gnode = glist_findhostport(glist, curhost->fqdn, curhost->port); | |
545 | + glist_node_unchain(&glist, gnode); | |
546 | + glist_node_free(&gnode); | |
547 | + glist_node_free(&curhost); | |
548 | + glist_free(mlist); | |
549 | + continue; | |
550 | + } | |
551 | + } | |
552 | + | |
553 | + /* mark host as 'okay' and update its IP address */ | |
554 | + gnode = glist_findhostport(glist, curhost->fqdn, curhost->port); | |
555 | + gnode->failedsince = 0; | |
556 | + strcpy(gnode->ipaddr, curhost_ipaddr); | |
557 | + | |
558 | + /* curate the mlist by removing all entries with a NULL selector (these are not spider-able) */ | |
559 | + for (gnode = mlist; gnode != NULL; ) { | |
560 | + struct gopherlist *g = gnode; | |
561 | + gnode = gnode->next; | |
562 | + if (g->selector != NULL) continue; | |
563 | + /* drop the node */ | |
564 | + glist_node_unchain(&mlist, g); | |
565 | + glist_node_free(&g); | |
566 | + } | |
567 | + | |
568 | + /* choose a random entry from mlist */ | |
569 | + gnode = pickrandhostfromlist(mlist); | |
570 | + if (gnode == NULL) { | |
571 | + printf("pickrandhostfromlist() could not find a node candidate in list\n"); | |
572 | + glist_node_free(&curhost); | |
573 | + glist_free(mlist); | |
574 | + continue; | |
575 | + } | |
576 | + curhost = glist_node_dup(gnode); | |
577 | + | |
578 | + glist_free(mlist); | |
579 | + } | |
580 | + /* end of program */ | |
581 | +} |
@@ -0,0 +1,67 @@ | ||
1 | + | |
2 | +This directory contains the source files of the OGUP project. | |
3 | + | |
4 | +The web homepage of the ogup project is located at http://ogup.osdn.io | |
5 | + | |
6 | + | |
7 | +=== BUILD ==================================================================== | |
8 | + | |
9 | +Building the OGUP requires an ANSI C compiler (preferably gcc or clang) to | |
10 | +build the 'gopherjoker' crawler. | |
11 | + | |
12 | + | |
13 | +=== RUN ====================================================================== | |
14 | + | |
15 | +The 'frontend' directory can be served with a gopher server to present users | |
16 | +with the OGUP interface. This has been tested exclusively with the Motsognir | |
17 | +gopher server, and is unlikely to function properly with a different server. | |
18 | + | |
19 | +If the build process was successful, a 'gopherjoker' binary should have | |
20 | +appeared in the gopherjoker directory. This is a gopher crawler that will | |
21 | +spider the gopherspace. It should be executed as such: | |
22 | + | |
23 | + $ gopherjoker ogupdb.dat ogupdb.cnt | |
24 | + | |
25 | +ogupdb.dat is a comma-separated-value text file that gopherjoker uses as its | |
26 | +database of known gopher servers. ogupdb.cnt is a file that contains only | |
27 | +global counters of known servers, that is used by the OGUP frontend. | |
28 | + | |
29 | +The ogupdb.dat and ogupdb.cnt files should be stored in the directory where | |
30 | +OGUP's frontend lives, so the frontend always has most up-to-date data. | |
31 | + | |
32 | +Both ogupdb.dat and ogupdb.cnt are updated only once an hour by gopherjoker, | |
33 | +to avoid too much filesystem updates. | |
34 | + | |
35 | + | |
36 | +=== LEARNING NEW SERVERS ===================================================== | |
37 | + | |
38 | +gopherjoker learns about new gopher servers by discovering menu listings of | |
39 | +existing (known) servers. It is also possible to hand-feed gopherjoker with | |
40 | +new servers, the frontend has an appropriate submission form for that. This | |
41 | +form writes submissions to the /tmp/ogup/ directory, where they are picked up | |
42 | +by gopherjoker (and deleted by gopherjoker afterwards). | |
43 | + | |
44 | + | |
45 | +=== LICENSE ================================================================== | |
46 | + | |
47 | +All OGUP files are made available under the terms of the MIT License. | |
48 | + | |
49 | +Copyright (C) 2019-2021 Mateusz Viste | |
50 | + | |
51 | +Permission is hereby granted, free of charge, to any person obtaining a copy | |
52 | +of this software and associated documentation files (the "Software"), to deal | |
53 | +in the Software without restriction, including without limitation the rights | |
54 | +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
55 | +copies of the Software, and to permit persons to whom the Software is | |
56 | +furnished to do so, subject to the following conditions: | |
57 | + | |
58 | +The above copyright notice and this permission notice shall be included in all | |
59 | +copies or substantial portions of the Software. | |
60 | + | |
61 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
62 | +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
63 | +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
64 | +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
65 | +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
66 | +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
67 | +SOFTWARE. |