source code

agonistics: a language game

w a r r e n   s a c k   <wsack@media.mit.edu>

In the spirit of two exhibitions curated by Christiane Paul, CODeDOC
(September 2002) and CODeDOC II (September 2003), the source code
for the project is presented here to facilitate "a reverse look at 'software art'
projects by focusing on and comparing the 'back end' of the code that drives
the artwork's 'front end'-- the result of the code"(Paul, 2002). Unfortunately, unlike
the code in the CODeDOC shows, my code is not pretty. This is unfinished
work, likely to be in medias res for a couple of years. Consequently, it is provided
with a Cultural Commons Attribution-NonCommercial-NoDerivs 2.0 License,
rather than an Free or Open Source license (like the GPL) because I hope
to improve upon, debug and enhance Agonistics before others elaborate on
or modify it. Email me suggestions please!

1: #!/usr/bin/perl
2:
3: #########################################################################
4: ##
5: ## RUN_AGONISTICS
6: ##
7: ## Warren Sack (wsack@media.mit.edu)
8: ##
9: ## February 2005
10: ##
11: ## Usage: perl run_agonistics.pl <configuration file>
12: ##
13: ## Preconditions:
13: ## (1) The file configuration file needs to exist and contain
14: ## a correct set of values for all of the necessary
15: ## parameters.
16: ##
17: ## (2) The sub-directory "Resources" needs to be exist within
18: ## the directory that contains this script. The Resources
19: ## sub-directory contains a number of images and CGI scripts.
20: ##
21: ## (3) If this script is going be run by polling new messages
22: ## periodically from a server, a means to connect and
23: ## download those messages needs to be arranged before the
24: ## script is run. See the example configuration files
25: ## *.conf for details concerning, for example, the use of
26: ## a Yahoo email account as an means to poll and archive
27: ## new messages from a mailing list.
28: ##
29: #########################################################################
30: ##
31: ## Copyright (c) 2005 by Warren Sack
32: ##
33: ## This work is copyrighted with a Creative Commons
34: ## (http://creativecommons.org) Attribution-NonCommercial-NoDerivs 2.0
35: ## License.
36: ##
37: ## Here is a short summary of the license:
38: ##
39: ## You are free to copy, distribute, display, and perform the work
40: ## under the following conditions:
41: ##
42: ## Attribution: You must give the original author (Warren Sack) credit.
43: ##
44: ## Noncommercial: You may not use this work for commercial purposes.
45: ##
46: ## No Derivative Works: You may not alter, transform, or build upon this work.
47: ##
48: ## * For any reuse or distribution, you must make clear to others the
49: ## license terms of this work.
50: ## * Any of these conditions can be waived if you get permission from
51: ## the copyright holder (Warren Sack).
52: ## * Your fair use and other rights are in no way affected by the above.
53: ## * The details and full text of the license can be found at this URL:
54: ## http://creativecommons.org/licenses/by-nc-nd/2.0/legalcode
55: ##
56: #########################################################################
57:
58: if ( $#ARGV < 0 ) {
59:     die "Usage: run_agonistics.pl <configuration file>\n".
60:     "This script requires one argument.\n";
61: }
62: elsif ( not(-e $ARGV[0]) ) {
63:     die "Usage: run_agonistics.pl <configuration file>\n".
64:     "This script requires a configuration file. The given configuration file cannot be found.\n";
65: }
66:
67:
68: use utf8;
69: use Unicode::Normalize;
70: use HTML::Entities();
71: use File::Temp qw(tempfile);
72: use File::Path;
73: use Date::Manip qw(ParseDate ParseDateString UnixDate);
74: use Time::Local;
75: use File::Copy;
76: use MIME::WordDecoder;
77: use Lingua::Stem;
78: use Fcntl qw(:flock);
79: use Crypt::SSLeay;
80: use Mail::Client::Yahoo;
81: use Digest::MD5;
82: use Net::NNTP;
83: use Net::SSLeay;
84: use IO::Socket::SSL;
85: use Net::IMAP::Simple::SSL;
86:
87: $batch_of_messages = 0;
88:
89: ## READ_CONFIG_FILE
90: ##
91: sub read_config_file
92: {
93:     my($key,$value);
94:
95:     open(CONFIG,$ARGV[0]) || die "Can't find configuration file: $ARGV[0]\n";
96:     while(<CONFIG>) {
97:     chomp;
98:     if ( /^\#/ ) { next; }
99:     if ( /^\s*$/ ) { next; }
100:     ($key,$value) = $_ =~ /^(\S+)\s+(\S+)/;
101:     $agonistics_config{$key} = $value;
102:     }
103:     close(CONFIG);
104: }
105:
106:
107: ## INITIALIZE_GLOBAL_VARIABLES
108: ##
109: ## Input: None
110: ##
111: ## Effects: Global variables associated with this package are initialized.
112: ##
113: ## Output: None
114: ##
115: sub initialize_global_variables
116: {
117:     ## initializations done only before the first batch of messages is processed
118:     if ( $batch_of_messages == 0 ) {
119:  ## Open and read the config file into a hash.
120:     %agonistics_config = ();
121:     &read_config_file();
122:  ## Before setting slash, the machine's OS is checked.
123:     if ( $ENV{'OSTYPE'} =~ /^win/i ) { $slash = '\\'; }
124:     else { $slash = '/'; }
125:     $news_group = $agonistics_config{'Newsgroup'};
126:     $archive_file_name = $agonistics_config{'FileNameOfArchive'};
127:     $language_locale = $agonistics_config{'LanguageTag'};
128:     if ( defined($agonistics_config{'MaxFrames'}) ) {
129:         $max_frames = $agonistics_config{'MaxFrames'};
130:     }
131:     else { $max_frames = 1000; }
132:     if ( defined($agonistics_config{'PauseBetweenFrames'}) ) {
133:         $pause_between_frames = $agonistics_config{'PauseBetweenFrames'};
134:     }
135:     else { $pause_between_frames = 7; }
136:  ## Load the correct end-of-sentence tagger. English and French
137:  ## texts both use the English tagger. German texts use a
138:  ## different tagger.
139:     if ($language_locale eq 'DE') {
140:         require Lingua::DE::Sentence;
141:         Lingua::DE::Sentence->import( qw(get_sentences) );
142:     }
143:     else {
144:         require Lingua::EN::Sentence;
145:         Lingua::EN::Sentence->import( qw(get_sentences) );
146:     }
147:     $archive_name = $agonistics_config{'DirectoryForOutput'};
148:     $recency = $agonistics_config{'Recency'};
149:     $documents_url = $agonistics_config{'DocumentsURL'};
150:     $cgi_url = $agonistics_config{'CGIURL'};
151:     $web_server_directory = $agonistics_config{'WebServerDirectory'};
152:     $web_server_cgi_directory = $agonistics_config{'WebServerCGIDirectory'};
153:     $is_interactive_p = $agonistics_config{'InteractiveMode'};
154:     if ( $is_interactive_p =~ /n/i ) { $is_interactive_p = 'NO'; }
155:     else { $is_interactive_p = 'YES'; }
156:  ## Record the address to be used for posting messages to the list analyzed.
157:     $mailing_list_address = $agonistics_config{'MailingListAddress'};
158:  ## Note information about the Yahoo mail account, if it is to be used.
159:     $yahoo_uid = $agonistics_config{'YahooUID'};
160:     $yahoo_password = $agonistics_config{'YahooPassword'};
161:     $yahoo_outbox = $agonistics_config{'YahooOutbox'};
162:  ## Note information about the IMAP mail server and account, if it is to be used.
163:     $imap_server = $agonistics_config{'IMAPServer'};
164:     $imap_uid = $agonistics_config{'IMAPUID'};
165:     $imap_password = $agonistics_config{'IMAPPassword'};
166:     $imap_outbox = $agonistics_config{'IMAPOutbox'};
167:  ## Note information about the NNTP account and server, if it is to be used.
168:     $nntp_server = $agonistics_config{'NNTPServer'};
169:     $nntp_uid = $agonistics_config{'NNTPUID'};
170:     $nntp_password = $agonistics_config{'NNTPPassword'};
171:     if ( ( $nntp_server and ( $yahoo_uid or $imap_server ) )
172:          or ( $yahoo_uid and ( $nntp_server or $imap_server ) )
173:          or ( $imap_server and ( $nntp_server or $yahoo_uid ) ) ) {
174:         die "Only one of the following may be defined: (a) NNTP server; (b) Yahoo UID; (c) IMAP server".
175:         "\nTwo of the three need to be commented out in the configuration file $ARGV[0]\n";
176:     }
177:  ## How many seconds should the script wait between tries to download
178:  ## messages from the server?
179:     $pause = $agonistics_config{'PauseBetweenFetches'};
180:     $output_directory = $web_server_directory.$slash.'Agonistics'.$slash.$archive_name;
181:     $cgi_directory = $web_server_cgi_directory.$slash.'Agonistics';
182:  ## create the directory to house the CGI scripts
183:     mkdir($cgi_directory);
184:  ## copy the CGI scripts into the CGI directory
185:     my $cgi_script_file;
186:     my $send_script_file_found_p = 0;
187:     opendir CGISCRIPTS, 'Resources'.$slash.'CGIScripts' or die "Cannot open CGIScripts directory: $!";
188:     foreach $cgi_script_file (readdir CGISCRIPTS) {
189:         if ( $cgi_script_file eq 'send_message.pl' ) {
190:         &rewrite_send_script_file('Resources'.$slash.'CGIScripts'.$slash.$cgi_script_file,$cgi_directory.$slash.$cgi_script_file);
191:         $send_script_file_found_p = 1;
192:         }
193:         else { copy('Resources'.$slash.'CGIScripts'.$slash.$cgi_script_file,$cgi_directory.$slash.$cgi_script_file); }
194:         chmod(0777,$cgi_directory.$slash.$cgi_script_file);
195:     }
196:     closedir CGISCRIPTS;
197:     if ( $send_script_file_found_p == 0 ) { die "Can't find send_message.pl CGI script\n"; }
198:  ## Alternatively load the German or English+French end-of-sentence
199:  ## tagger depending upon the language specified on the command line.
200:     if ( $language_locale =~ /^DE/ ) {
201:         require Lingua::DE::Sentence;
202:         Lingua::DE::Sentence->import( qw(get_sentences) );
203:     }
204:     else {
205:         require Lingua::EN::Sentence;
206:         Lingua::EN::Sentence->import( qw(get_sentences) );
207:     }
208:     $stemmer = Lingua::Stem->new({-locale => $language_locale});
209:     $stemmer->stem_caching({ -level => 2 });
210:     $log_file = $agonistics_config{'FileNameOfLog'};
211:     open(LOG,'>'.$log_file);
212:     close(LOG);
213:     $radius_of_circle = 100000;
214:     my $random_number = int(rand(10000));
215:     $raw_messages_file = 'raw_messages_file_'.$random_number.'.txt';
216:     $end_of_message_marker = '__end_of_message_marker__';
217:  ## English stop words
218:     @english_stop_words = ("a", "about", "above", "according", "across", "actually", "adj", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also", "although", "always", "among", "amongst", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anywhere", "are", "aren", "around", "as", "at", "b", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "begin", "beginning", "behind", "being", "below", "beside", "besides", "between", "beyond", "billion", "both", "but", "by", "c", "can", "can", "cannot", "caption", "co", "could", "couldn", "d", "did", "didn", "do", "does", "doesn", "don", "down", "during", "e", "each", "eg", "eight", "eighty", "either", "else", "elsewhere", "end", "ending", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "f", "few", "fifty", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "further", "g", "h", "had", "has", "hasn", "have", "haven", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "i", "ie", "if", "in", "inc", "indeed", "instead", "into", "is", "isn", "it", "its", "itself", "j", "k", "l", "last", "later", "latter", "latterly", "least", "less", "let", "like", "likely", "ll", "ltd", "m", "made", "make", "makes", "many", "maybe", "me", "meantime", "meanwhile", "might", "million", "miss", "more", "moreover", "most", "mostly", "mr", "mrs", "much", "must", "my", "myself", "n", "namely", "neither", "never", "nevertheless", "next", "nine", "ninety", "no", "nobody", "none", "nonetheless", "noone", "nor", "not", "nothing", "now", "nowhere", "o", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "overall", "own", "p", "per", "perhaps", "q", "r", "rather", "re", "recent", "recently", "s", "same", "seem", "seemed", "seeming", "seems", "seven", "seventy", "several", "she", "should", "shouldn", "since", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "stop", "such", "t", "taking", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thirty", "this", "those", "though", "thousand", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "toward", "towards", "trillion", "twenty", "two", "u", "under", "unless", "unlike", "unlikely", "until", "up", "upon", "us", "used", "using", "v", "ve", "very", "via", "w", "was", "wasn", "we", "well", "were", "weren", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whomever", "whose", "why", "will", "with", "within", "without", "would", "wouldn", "wrote", "x", "y", "yes", "yet", "you", "your", "yours", "yourself", "yourselves", "z", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "http", "www", "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec", "com", "edu", "org", "just", "net", "ftp", "nntp", "http", "html");
219:     @english_present_tense_of_to_be = ( "am", "are", "is" );
220:  ## French stop words
221:  ## Note that some words here are intentionally misspelled (i.e., spelled without diacritical marks).
222:  ## This appears to be necessary since the diacritical marks are not always employed by the participants
223:  ## and they are oftentimes lost between the client and server.
224:     @french_stop_words = ( "a", "au", "aux", "avec", "ce", "ces", "dans", "de", "des", "du", "elle", "elles", "en", "et", "eux", "il", "ils", "je", "la", "le", "les", "leur", "lui", "ma", "mais", "me", "m\x{EA}me", "mes", "moi", "mon", "ne", "nos", "notre", "nous", "on", "ou", "par", "pas", "plus", "pour", "qu", "que", "qui", "bien", "bon", "bonne", "bonnes", "faire", "fais", "fait", "faisons", "font", "sa", "se", "ses", "son", "sur", "ta", "te", "tes", "toi", "ton", "tu", "un", "une", "vos", "votre", "vous", "c", "d", "j", "l", "\x{E0}", "m", "n", "s", "t", "y", "\x{E9}t\x{E9}", "ete", "\x{E9}t\x{E9}e", "etee", "\x{E9}t\x{E9}es", "etees", "\x{E9}t\x{E9}s", "etes", "\x{E9}tant", "etant", "\x{E9}tante", "etante", "\x{E9}tants", "etants", "\x{E9}tantes", "etantes", "suis", "es", "est", "sommes", "\x{EA}tes", "etes", "sont", "serai", "seras", "sera", "serons", "serez", "seront", "serais", "serait", "serions", "seriez", "seraient", "\x{E9}tais", "etais", "\x{E9}tait", "etait", "\x{E9}tions", "etions", "\x{E9}tiez", "etiez", "\x{E9}taient", "etaient", "fus", "fut", "f\x{FB}mes", "fumes", "f\x{FB}tes", "futes", "furent", "sois", "soit", "soyons", "soyez", "soient", "fusse", "fusses", "f\x{FB}t", "fut", "fussions", "fussiez", "fussent", "ayant", "ayante", "ayantes", "ayants", "eu", "eue", "eues", "eus", "ai", "as", "avons", "avez", "ont", "aurai", "auras", "aura", "aurons", "aurez", "auront", "aurais", "aurait", "aurions", "auriez", "auraient", "avais", "avait", "avions", "aviez", "avaient", "eut", "e\x{FB}mes", "eumes", "e\x{FB}tes", "eutes", "eurent", "aie", "aies", "ait", "ayons", "ayez", "aient", "eusse", "eusses", "e\x{FB}t", "eut", "eussions", "eussiez", "eussent", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "un", "deux", "trois", "quatre", "cinq", "six", "sept", "huit", "neuf", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "http", "message", "jamais", "fr", "sans", "non", "oui", "dire", "dis", "dit", "disez", "disons", "disent", "faut", "toujours", "que", "quel", "quelle", "quelles", "quelqu", "quelque", "quelques", "quels", "qui", "quiconque", "quoi", "quoiqu", "quoique", "\x{E7}a", "ca", "car", "ce", "ceci", "cela", "celle", "l\x{E0}", "la", "celles", "ci", "celles", "celui", "ces", "cet", "cette", "ceux", "comme", "vrai", "\x{EA}tre", "etre", "\x{E9}crit", "ecrit", "rien", "tout", "tous", "toute", "toutes", "si", "fr", "com", "edu", "org", "autres", "autre", "sauf", "vais", "vas", "va", "venons", "venez", "vont", "aller", "aimer", "aiment", "aimons", "aime", "aimes", "mal", "news", "nntp", "html", "ftp", "net", "part", "puis", "voir", "quand", "tant", "autant", "leur", "leurs", "tien", "tienne", "tiens", "tiennes", "mien", "mienne", "miens", "miennes", "soi", "aussi", "ailleurs", "moins", "alors", "passe", "pass\x{E9}", "avoir" );
225:     @french_present_tense_of_to_be = ( "suis", "es", "est", "\x{EA}tes", "etes", "sommes", "sont" );
226:  ## German stop words
227:  ## Note that some words here are intentionally misspelled (i.e., spelled without diacritical marks).
228:  ## This appears to be necessary since the diacritical marks are not always employed by the participants
229:  ## and they are oftentimes lost between the client and server.
230:     @german_stop_words = ( "aber", "alle", "allem", "allen", "aller", "alles", "als", "also", "am", "an", "ander", "andere", "anderem", "anderen", "anderer", "anderes", "anderm", "andern", "anderr", "anders", "auch", "auf", "aus", "bei", "bin", "bis", "bist", "da", "damit", "dann", "der", "den", "des", "dem", "die", "das", "da\x{DF}", "derselbe", "derselben", "denselben", "desselben", "demselben", "dieselbe", "dieselben", "dasselbe", "dazu", "dein", "deine", "deinem", "deinen", "deiner", "deines", "denn", "derer", "dessen", "dich", "dir", "du", "dies", "diese", "diesem", "diesen", "dieser", "dieses", "doch", "dort", "durch", "ein", "eine", "einem", "einen", "einer", "eines", "einig", "einige", "einigem", "einigen", "einiger", "einiges", "einmal", "er", "ihn", "ihm", "es", "etwas", "euer", "eure", "eurem", "euren", "eurer", "eures", "f\x{FC}r", "fur", "gegen", "gewesen", "hab", "habe", "haben", "hat", "hatte", "hatten", "hier", "hin", "hinter", "ich", "mich", "mir", "ihr", "ihre", "ihrem", "ihren", "ihrer", "ihres", "euch", "im", "in", "indem", "ins", "ist", "jede", "jedem", "jeden", "jeder", "jedes", "jene", "jenem", "jenen", "jener", "jenes", "jetzt", "kann", "kein", "keine", "keinem", "keinen", "keiner", "keines", "k\x{F6}nnen", "konnen", "k\x{F6}nnte", "konntemachen", "man", "manche", "manchem", "manchen", "mancher", "manches", "mein", "meine", "meinem", "meinen", "meiner", "meines", "mit", "muss", "musste", "nach", "nicht", "nichts", "noch", "nun", "nur", "ob", "oder", "ohne", "sehr", "sein", "seine", "seinem", "seinen", "seiner", "seines", "selbst", "sich", "sie", "ihnen", "sind", "so", "solche", "solchem", "solchen", "solcher", "solches", "soll", "sollte", "sondern", "sonst", "\x{FC}ber", "uber", "um", "und", "uns", "unse", "unsem", "unsen", "unser", "unses", "unter", "viel", "vom", "von", "vor", "w\x{E4}hrend", "wahrend", "war", "waren", "warst", "was", "weg", "weil", "weiter", "welche", "welchem", "welchen", "welcher", "welches", "wenn", "werde", "werden", "wie", "wieder", "will", "wir", "wird", "wirst", "wo", "wollen", "wollte", "w\x{FC}rde", "wurde", "w\x{FC}rden", "wurden", "zu", "zum", "zur", "zwar", "zwischen", "dk", "com", "edu", "org", "news", "http", "ftp", "html", "com", "org", "net", "nntp");
231:     @german_present_tense_of_to_be = ( "bin", "bist", "ist", "sind", "seid" );
232:     @stop_words = ();
233:     if ( $language_locale =~ /^EN/ ) { @stop_words = @english_stop_words; }
234:  ## Many French and German discussions include English posts, so English stop words are used for them too.
235:     elsif ( $language_locale =~ /^FR/ ) { push(@stop_words,@french_stop_words,@english_stop_words); }
236:     elsif ( $language_locale =~ /^DE/ ) { push(@stop_words,@german_stop_words,@english_stop_words); }