Wikipedysta:Olafbot/OlafbotBasicTopics.java
Poniższy kod w Javie dodaje do wszystkich artykułów wymienionych w "podstawowych zagadnieniach" z jakiejś dziedziny linki do "podstawowych zagadnień".
/* */ /* Olafbot for Wikipedia */ /* */ /* Olaf Matyja 2004 */ /* */ import java.lang.*; import java.net.*; import java.io.*; import java.io.Reader.*; import java.util.*; public class OlafbotBasicTopics { final static String dirPath="d:/Olaf/Olafbot"; private static class OlafBotException extends Exception { boolean fatal; OlafBotException(String comment,boolean fatal) { super(comment); this.fatal=fatal; } }; //polish letters final static String a="%C4%85"; final static String c="%C4%87"; final static String e="%C4%99"; final static String l="%C5%82"; final static String n="%C5%84"; final static String o="%C3%B3"; final static String s="%C5%9B"; final static String x="%C5%BA"; final static String z="%C5%BC"; final static String A="%C4%84"; final static String C="%C4%86"; final static String E="%C4%98"; final static String L="%C5%81"; final static String N="%C5%83"; final static String O="%C3%93"; final static String S="%C5%9A"; final static String X="%C5%B9"; final static String Z="%C5%BB"; final static String plus="%2B"; final static String ampersand="%26"; static InetAddress wikipedia=null; static String session="800737744ba08333e8d3802105395f51"; static String editTime=""; static String replaceAll(String src,String from,String to) { int start=0; while (true) { int n=src.indexOf(from,start); if (n<0) return src; src=(n>0 ? src.substring(0,n) : "")+ to+ (n+from.length()<src.length() ? src.substring(n+from.length()) : ""); start=n+to.length(); } } //connect with the server static String connect(String request,String content) throws Exception { Socket socket=null; try { socket= new Socket(wikipedia,80); DataInputStream input = new DataInputStream(socket.getInputStream()); PrintStream output = new PrintStream(socket.getOutputStream(),true); output.println( request+ "Content-type: application/x-www-form-urlencoded\n"+ "Accept: */*\n"+ "User-Agent: OlafBot\n"+ "Host: pl.wikipedia.org\n"+ "Connection: Keep-Alive\n"+ "Cache-Control: no-cache\n"+ "Cookie: plwikiUserName=Olafbot; "+ "plwikiSession="+session+"; "+ "plwikiUserID=2189; "+ "plwikiPassword=deleted\n"+ "\n"+ content ); String text="", inputLine; while ((inputLine = input.readLine()) != null) { //if (inputLine.length()>0) // text=inputLine; text+=inputLine.trim()+"\n"; //System.out.println(inputLine); } text=replaceAll(text,"<","<"); text=replaceAll(text,">",">"); text=replaceAll(text,""","\""); text=replaceAll(text,"&","&"); //text=URLDecoder.decode(text); //System.out.println(text); final String sessStr="Set-Cookie: plwikiSession="; int sess=text.indexOf(sessStr); if (sess>=0) { int end=text.indexOf("; path=",sess); if (end<0) throw new RuntimeException("Invalid cookie\n"+text); session=text.substring(sess+sessStr.length(),end); } final String editStr=" name=\"wpEdittime\""; int edit=text.indexOf(editStr); if (edit>=0) editTime=text.substring(edit-15,edit-1); //System.out.println(text+"\n*************************************************"); //TODO debug if (text.indexOf("<strong>Masz <a href=\"/wiki/Dyskusja_wikipedysty:Olafbot\"")>=0) { throw new OlafBotException("Program zatrzymany.",true); } if (text.toLowerCase().indexOf("konflikt edycji")>=0) { System.out.println(text); throw new Exception("Konflikt edycji\n"+text); } if (content.length()==0) { int start=text.indexOf("wrap=\"virtual\">"); int end=text.indexOf("</textarea>"); if (start<0 || end<0) throw new Exception("Nie znaleziony tekst\n"+text); text=text.substring(start+15,end); for (int n=0;n<text.length();n++) { int z=(int)text.charAt(n); if (z>127 || z<32 && z!=10 || z==(int)'%') { text=(n>0 ? text.substring(0,n) : "")+ "%"+Integer.toHexString((int)text.charAt(n)).toUpperCase()+ (n<text.length()-1 ? text.substring(n+1) : ""); } } //System.out.println(text);//todo debug return text; } else return null; } finally { if (socket!=null) socket.close(); } } static public String getPage(String page) { String pageName=replaceAll(page," ","_"); String content=null; while (true) try { content=connect( "GET /w/wiki.phtml?title="+pageName+"&action=edit&redirect=no HTTP/1.1\n", "" ); break; } catch(Exception ex) { ex.printStackTrace(); }; return content; } static void login() { String toPost= "wpName=Olafbot&"+ "wpLoginattempt=Zaloguj mnie&"+ "wpPassword=(...)&"+ //Wycięte hasło "wpRemember=1&"+ "\n"; while (true) try { System.out.println("Logowanie..."); connect( "POST /w/wiki.phtml?title=Specjalna:Userlogin&returnto=Specjalna:Userlogout HTTP/1.1\n"+ "Content-Length: "+String.valueOf(toPost.length())+"\n", toPost ); break; } catch(Exception ex) { ex.printStackTrace(); } } static void writeLog(String pageName,String topicsName,String text) { try { PrintWriter fw=new PrintWriter(new FileWriter(dirPath+"/podstawowe.txt",true)); fw.println(pageName+"\t"+topicsName+"\t"+text); fw.close(); } catch (Throwable ex2) { ex2.printStackTrace(); System.exit(0); } } static void writeLog(String pageName,String topicsName,Throwable ex) { try { PrintWriter fw=new PrintWriter(new FileWriter(dirPath+"/Log "+pageName+".txt",true)); fw.println(pageName); fw.println(topicsName); ex.printStackTrace(); ex.printStackTrace(fw); fw.close(); } catch (Throwable ex2) { ex2.printStackTrace(); System.exit(0); } } static String processPage(String pageName,String topicsName) throws Exception { Thread.sleep(5000); System.out.println("Czytanie "+pageName); String content=getPage(pageName).trim(); //System.out.println(content); String result="???"; if (content.length()==0) { result="Strona nie istnieje"; System.out.println(result); return result; } String content2=content.toLowerCase(); if (content2.indexOf("redirect")>=0) { result="Jest redirect"; System.out.println(result); return result; } String topics1=topicsName.toLowerCase(); String topics2=replaceAll(topics1," ","_"); if (content2.indexOf(topics1)>=0 || content2.indexOf(topics2)>=0) { result="Jest link"; System.out.println(result); return result; } if (content2.indexOf("ujednoznaczniaj")>=0 || content2.indexOf("msg:disamb")>=0) { result="Strona ujednoznaczniajaca"; System.out.println(result); return result; } int zobacz=content2.indexOf("zobacz"); if (zobacz>=2 && content.charAt(zobacz-2)=='(') { result="Dziwne zobacz"; System.out.println(result); return result; } if (zobacz<0) { result="Dodawana sekcja"; System.out.println(result); content+="\n\nZobacz te"+z+": [["+topicsName.toLowerCase()+"]]\n"; } else { if (content2.indexOf("zobacz",zobacz+1)>=0) { result="Dwukrotne zobacz"; System.out.println(result); return result; } int k=content.indexOf("[[",zobacz); if (k<0) { k=content.indexOf("\n",zobacz); if (k<0) k=content.length(); else while (k<content.length() && content.charAt(k)<' ') k++; } int gwiazdka=content.substring(zobacz,k).indexOf("*"); result=gwiazdka>=0 ? "Dodawany link po gwiazdce" : "Dodawany link w linii"; System.out.println(result); content=content.substring(0,k)+"[["+topicsName.toLowerCase()+ (gwiazdka>=0 ? ("]]\n*"+ (content.charAt(gwiazdka+1)==' ' ? " " : "")) : "]], ")+ (k<content.length() ? content.substring(k) : ""); } content=replaceAll(content,"[[mat.]]",""); content=replaceAll(content,"&","%26"); content=replaceAll(content,"+","%2B"); Thread.sleep(15000); System.out.println("Zapisywanie "+pageName); pageName=replaceAll(pageName," ","_"); String content3= "wpSummary=Dodanie linku do spisu [["+topicsName.toLowerCase()+"]]&"+ "wpMinoredit=1&"+ "wpWatchthis=0&"+ "wpEdittime="+editTime+"&"+ "wpSection=&"+ "wpTextbox1="+content+ "\n"; connect( "POST /w/wiki.phtml?title="+pageName+"&action=submit&redirect=no HTTP/1.1\n"+ "Content-Type: application/x-www-form-urlencoded\n"+ "Content-Length: "+String.valueOf(content3.length())+"\n", content3 ); System.out.println("OK"); return result; } static boolean started=false; static void processTopics(String topicsName) { System.out.println("Czytanie "+topicsName); String topics=getPage(topicsName); System.out.println("Odczytane"); int start=0; Properties pages=new Properties(); while (true) { int n=topics.indexOf("[[",start); if (n>=0) { int k1=topics.indexOf("|",n); int k2=topics.indexOf("]]",n); int k=k1<k2 && k1>=0 ? k1 : k2; String pageName=topics.substring(n+2,k).trim(); if (pageName.indexOf("odstawowe")<0 && pageName.indexOf("losariusz")<0 && !pageName.equals("Wikipedia") && pageName.indexOf(":")<0 && pageName.indexOf("#")<0 && pages.getProperty(pageName)==null) { while (true) { try { //if (pageName.startsWith("tablica ca")) started=true; if (started) writeLog(pageName,topicsName,processPage(pageName,topicsName)); //System.out.println("<h1>"+yearName(year,false)+" / "+(year>0 ? romanNumber(year) : "")+"</h1>\n"); //System.out.println(sovereignTable(year)); break; } catch(OlafBotException ex) { writeLog(pageName,topicsName,"Error: "+ex.getMessage()); writeLog(pageName,topicsName,ex); if (ex.fatal) System.exit(0); break; } catch(Throwable ex) { if (ex instanceof SocketException) ex.printStackTrace(); else writeLog(pageName,topicsName,ex); try { Thread.sleep(30000); } catch (Exception ex2) { writeLog(pageName,topicsName,ex2); } } } pages.put(pageName,""); } start=n+2; } else break; } } public static void main(String[] args) throws Exception { wikipedia=InetAddress.getByName("pl.wikipedia.org"); login(); writeLog("","","Wystartowany"); processTopics("Podstawowe zagadnienia z zakresu matematyki"); //processTopics("Podstawowe zagadnienia z zakresu muzyki"); processTopics("Podstawowe zagadnienia z zakresu biologii"); processTopics("Podstawowe zagadnienia z zakresu architektury"); processTopics("Podstawowe zagadnienia z zakresu astronomii"); processTopics("Podstawowe zagadnienia z zakresu geografii"); processTopics("Podstawowe zagadnienia z zakresu ekologii"); processTopics("Podstawowe zagadnienia z zakresu psychologii"); processTopics("Podstawowe zagadnienia z zakresu filozofii"); } }