diff options
author | Vitaly Takmazov | 2023-01-30 21:23:30 +0300 |
---|---|---|
committer | Vitaly Takmazov | 2023-01-30 23:17:41 +0300 |
commit | dd23559a978da8980675ad4089948ade9bbc323d (patch) | |
tree | 8579d591db2c8424b7dc64832fb4a5c6912dc5ec /scripts | |
parent | fbc5c1330679b12b09c6d970ee1750fa83f4cac4 (diff) |
scripts: sitemap.pl
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/sitemap.pl | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/scripts/sitemap.pl b/scripts/sitemap.pl new file mode 100644 index 00000000..0363fd44 --- /dev/null +++ b/scripts/sitemap.pl @@ -0,0 +1,81 @@ +#!/usr/bin/perl + +use strict; +use DBI; +use CGI; +use utf8; + +my $dbh = DBI->connect("dbi:Pg:dbname=juick"); + +my $sql = +"SELECT users.nick,messages.message_id FROM messages INNER JOIN users ON messages.user_id=users.id WHERE users.banned=0 AND messages.privacy=1"; +my $sth = $dbh->prepare($sql) or die; +$sth->execute or die; +my $filescount = int( $sth->rows / 50000 ) + 1; +for ( my $next = 0 ; $next < $filescount ; $next++ ) { + open( TEMP, ">_sitemap-messages" . $next . ".xml" ); + print TEMP '<?xml version="1.0" encoding="UTF-8"?>' . "\n"; + print TEMP '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' + . "\n"; + for ( my $i = 0 ; $i < 50000 ; $i++ ) { + my ( $nick, $mid ) = $sth->fetchrow() or goto NEXT; + print TEMP '<url><loc>https://juick.com/' + . $nick . '/' + . $mid + . '</loc></url>' . "\n"; + } + NEXT: print TEMP '</urlset>' . "\n"; +} +open( TEMP, ">_sitemap-users.xml" ); +print TEMP '<?xml version="1.0" encoding="UTF-8"?>' . "\n"; +print TEMP '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' + . "\n"; + +my $sql = +'SELECT nick, to_char(lastmessage, \'YYYY-MM-DD"T"HH24:MI:SS"Z"\') FROM users INNER JOIN messages ON users.id=messages.user_id WHERE users.banned=0 GROUP BY users.id HAVING COUNT(message_id)>4'; +my $sth = $dbh->prepare($sql) or die; +$sth->execute or die; +while ( my ( $nick, $lastmod ) = $sth->fetchrow ) { + my $data = + '<url><loc>https://juick.com/' + . CGI::escape($nick) + . '/</loc><lastmod>' + . $lastmod + . '</lastmod></url>' . "\n"; + print $data; + print TEMP $data; +} + +print TEMP '</urlset>' . "\n"; + +open( TEMP, ">_sitemap-tags.xml" ); +print TEMP '<?xml version="1.0" encoding="UTF-8"?>' . "\n"; +print TEMP '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' + . "\n"; + +my $sql = +"SELECT tags.name FROM tags WHERE noindex=0 AND synonym_id IS NULL AND stat_messages>9 AND stat_users>2"; +my $sth = $dbh->prepare($sql) or die; +$sth->execute or die; +while ( my ($tag) = $sth->fetchrow ) { + print TEMP '<url><loc>https://juick.com/tag/' + . CGI::escape($tag) + . '</loc></url>' . "\n"; +} + +print TEMP '</urlset>' . "\n"; + +open( TEMP, ">_sitemap-index.xml" ); +print TEMP '<?xml version="1.0" encoding="UTF-8"?>' . "\n"; +print TEMP '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' + . "\n"; +print TEMP '<sitemap><loc>https://juick.com/_sitemap-users.xml</loc></sitemap>' + . "\n"; +print TEMP '<sitemap><loc>https://juick.com/_sitemap-tags.xml</loc></sitemap>' + . "\n"; +for ( my $next = 0 ; $next < $filescount ; $next++ ) { + print TEMP '<sitemap><loc>https://juick.com/_sitemap-messages' + . $next + . '.xml</loc></sitemap>' . "\n"; +} +print TEMP '</sitemapindex>' . "\n"; |