Nothing Special   »   [go: up one dir, main page]

Wikipedysta:Olafbot/OlafbotBasicTopics.java

Poniższy kod w Javie dodaje do wszystkich artykułów wymienionych w "podstawowych zagadnieniach" z jakiejś dziedziny linki do "podstawowych zagadnień".


/*                                         */
/*          Olafbot for Wikipedia          */
/*                                         */
/*             Olaf Matyja 2004            */
/*                                         */

import java.lang.*;
import java.net.*;
import java.io.*;
import java.io.Reader.*;
import java.util.*;

public class OlafbotBasicTopics {
    final static String dirPath="d:/Olaf/Olafbot";
    
    private static class OlafBotException extends Exception {
            boolean fatal;

            OlafBotException(String comment,boolean fatal) {
                    super(comment);
                    this.fatal=fatal;
            }
    };

    //polish letters
    final static String a="%C4%85";
    final static String c="%C4%87";
    final static String e="%C4%99";
    final static String l="%C5%82";
    final static String n="%C5%84";
    final static String o="%C3%B3";
    final static String s="%C5%9B";
    final static String x="%C5%BA";
    final static String z="%C5%BC";
    final static String A="%C4%84";
    final static String C="%C4%86";
    final static String E="%C4%98";
    final static String L="%C5%81";
    final static String N="%C5%83";
    final static String O="%C3%93";
    final static String S="%C5%9A";
    final static String X="%C5%B9";
    final static String Z="%C5%BB";
    final static String plus="%2B";
    final static String ampersand="%26";

    static InetAddress wikipedia=null;
    static String session="800737744ba08333e8d3802105395f51";
    static String editTime="";

    static String replaceAll(String src,String from,String to) {
        int start=0;
        while (true) {
            int n=src.indexOf(from,start);
            if (n<0)
                return src;
            src=(n>0 ? src.substring(0,n) : "")+
                to+
                (n+from.length()<src.length() ? src.substring(n+from.length()) : "");
            start=n+to.length();
        }
    }
        
    //connect with the server
    static String connect(String request,String content) throws Exception {
        Socket socket=null;
        try {
            socket= new Socket(wikipedia,80);
            DataInputStream input = new DataInputStream(socket.getInputStream()); 
            PrintStream output = new PrintStream(socket.getOutputStream(),true);
            
            output.println(
                request+
                "Content-type: application/x-www-form-urlencoded\n"+
                "Accept: */*\n"+
                "User-Agent: OlafBot\n"+
                "Host: pl.wikipedia.org\n"+
                "Connection: Keep-Alive\n"+
                "Cache-Control: no-cache\n"+
                "Cookie: plwikiUserName=Olafbot; "+
                        "plwikiSession="+session+"; "+
                        "plwikiUserID=2189; "+
                        "plwikiPassword=deleted\n"+
                "\n"+
                content
            );

            String text="", inputLine;
            
            while ((inputLine = input.readLine()) != null) {
                //if (inputLine.length()>0)
                //    text=inputLine;
                text+=inputLine.trim()+"\n";
                //System.out.println(inputLine);
            }
            text=replaceAll(text,"<","<");
            text=replaceAll(text,">",">");
            text=replaceAll(text,""","\"");
            text=replaceAll(text,"&","&");
            //text=URLDecoder.decode(text);
            //System.out.println(text);

            final String sessStr="Set-Cookie: plwikiSession=";
            int sess=text.indexOf(sessStr);
            if (sess>=0) {
                int end=text.indexOf("; path=",sess);
                if (end<0)
                    throw new RuntimeException("Invalid cookie\n"+text);
                session=text.substring(sess+sessStr.length(),end);
            }
            
            final String editStr=" name=\"wpEdittime\"";
            int edit=text.indexOf(editStr);
            if (edit>=0)
                editTime=text.substring(edit-15,edit-1);

            //System.out.println(text+"\n*************************************************"); //TODO debug
            if (text.indexOf("<strong>Masz <a href=\"/wiki/Dyskusja_wikipedysty:Olafbot\"")>=0) {
                    throw new OlafBotException("Program zatrzymany.",true);
            }
            if (text.toLowerCase().indexOf("konflikt edycji")>=0) {
                    System.out.println(text);
                    throw new Exception("Konflikt edycji\n"+text);
            }
            if (content.length()==0) {
                    int start=text.indexOf("wrap=\"virtual\">");
                    int end=text.indexOf("</textarea>");
                    if (start<0 || end<0)
                            throw new Exception("Nie znaleziony tekst\n"+text);
                    text=text.substring(start+15,end);
                    for (int n=0;n<text.length();n++) {
                            int z=(int)text.charAt(n);
                            if (z>127 || z<32 && z!=10 || z==(int)'%') {
                                    text=(n>0 ? text.substring(0,n) : "")+
                                         "%"+Integer.toHexString((int)text.charAt(n)).toUpperCase()+
                                         (n<text.length()-1 ? text.substring(n+1) : "");
                            }
                    }
                    //System.out.println(text);//todo debug
                    return text;
            }
            else
                    return null;
        } finally {
            if (socket!=null)
                socket.close();
        }
    }

    static public String getPage(String page) {
        String pageName=replaceAll(page," ","_");
        String content=null;
        while (true)
            try {
                content=connect(
                        "GET /w/wiki.phtml?title="+pageName+"&action=edit&redirect=no HTTP/1.1\n",
                        ""
                );
                break;
            } catch(Exception ex) {
                ex.printStackTrace();
            };
        return content;
    }

    static void login() {
        String toPost=
                "wpName=Olafbot&"+
                "wpLoginattempt=Zaloguj mnie&"+
                "wpPassword=(...)&"+ //Wycięte hasło
                "wpRemember=1&"+
                "\n";

        while (true) 
            try {
                System.out.println("Logowanie...");
                connect(
                        "POST /w/wiki.phtml?title=Specjalna:Userlogin&returnto=Specjalna:Userlogout HTTP/1.1\n"+
                        "Content-Length: "+String.valueOf(toPost.length())+"\n",
                        toPost
                );
                break;
            } catch(Exception ex) {
                ex.printStackTrace();
            }
    }

    static void writeLog(String pageName,String topicsName,String text) {
        try {
                PrintWriter fw=new PrintWriter(new FileWriter(dirPath+"/podstawowe.txt",true));
                fw.println(pageName+"\t"+topicsName+"\t"+text);
                fw.close();
        } catch (Throwable ex2) {
                ex2.printStackTrace();
                System.exit(0);
        }
    }
    
    static void writeLog(String pageName,String topicsName,Throwable ex) {
        try {
                PrintWriter fw=new PrintWriter(new FileWriter(dirPath+"/Log "+pageName+".txt",true));
                fw.println(pageName);
                fw.println(topicsName);
                ex.printStackTrace();
                ex.printStackTrace(fw);
                fw.close();
        } catch (Throwable ex2) {
                ex2.printStackTrace();
                System.exit(0);
        }
    }
    
    static String processPage(String pageName,String topicsName) throws Exception {
        Thread.sleep(5000);
        System.out.println("Czytanie "+pageName);
        String content=getPage(pageName).trim();
        //System.out.println(content);
        String result="???";
        if (content.length()==0) {
            result="Strona nie istnieje";
            System.out.println(result);
            return result;
        }
        String content2=content.toLowerCase();
        if (content2.indexOf("redirect")>=0) {
            result="Jest redirect";
            System.out.println(result);
            return result;
        }
        String topics1=topicsName.toLowerCase();
        String topics2=replaceAll(topics1," ","_");
        if (content2.indexOf(topics1)>=0 || content2.indexOf(topics2)>=0) {
            result="Jest link";
            System.out.println(result);
            return result;
        }
        if (content2.indexOf("ujednoznaczniaj")>=0 || content2.indexOf("msg:disamb")>=0) {
            result="Strona ujednoznaczniajaca";
            System.out.println(result);
            return result;
        }
        
        int zobacz=content2.indexOf("zobacz");
        if (zobacz>=2 && content.charAt(zobacz-2)=='(') {
            result="Dziwne zobacz";
            System.out.println(result);
            return result;        
        }

        if (zobacz<0) {
            result="Dodawana sekcja";
            System.out.println(result);
            content+="\n\nZobacz te"+z+": [["+topicsName.toLowerCase()+"]]\n";
        } else {
            if (content2.indexOf("zobacz",zobacz+1)>=0) {
                result="Dwukrotne zobacz";
                System.out.println(result);
                return result;        
            }

            int k=content.indexOf("[[",zobacz);
            if (k<0) {
                k=content.indexOf("\n",zobacz);
                if (k<0)
                    k=content.length();
                else while (k<content.length() && content.charAt(k)<' ')
                    k++;
            }
            int gwiazdka=content.substring(zobacz,k).indexOf("*");
            result=gwiazdka>=0 ? "Dodawany link po gwiazdce" : "Dodawany link w linii";
            System.out.println(result);
            content=content.substring(0,k)+"[["+topicsName.toLowerCase()+
                (gwiazdka>=0 ? 
                    ("]]\n*"+ (content.charAt(gwiazdka+1)==' ' ? " " : "")) 
                    : "]], ")+
                (k<content.length() ? content.substring(k) : "");
        }
        
        content=replaceAll(content,"[[mat.]]","");   
        content=replaceAll(content,"&","%26");
        content=replaceAll(content,"+","%2B");

        Thread.sleep(15000);
        System.out.println("Zapisywanie "+pageName);
        pageName=replaceAll(pageName," ","_");
        String content3=
            "wpSummary=Dodanie linku do spisu [["+topicsName.toLowerCase()+"]]&"+
            "wpMinoredit=1&"+
            "wpWatchthis=0&"+
            "wpEdittime="+editTime+"&"+
            "wpSection=&"+
            "wpTextbox1="+content+
            "\n";
        connect(
            "POST /w/wiki.phtml?title="+pageName+"&action=submit&redirect=no HTTP/1.1\n"+
            "Content-Type: application/x-www-form-urlencoded\n"+
            "Content-Length: "+String.valueOf(content3.length())+"\n",
            content3
        );

        System.out.println("OK");
        return result;
    }
    
    static boolean started=false;
    
    static void processTopics(String topicsName) {
        System.out.println("Czytanie "+topicsName);
        String topics=getPage(topicsName);
        System.out.println("Odczytane");
        int start=0;
        Properties pages=new Properties();
        while (true) {
            int n=topics.indexOf("[[",start);
            if (n>=0) {
                int k1=topics.indexOf("|",n);
                int k2=topics.indexOf("]]",n);
                int k=k1<k2 && k1>=0 ? k1 : k2;
                String pageName=topics.substring(n+2,k).trim();
                if (pageName.indexOf("odstawowe")<0 && pageName.indexOf("losariusz")<0 && !pageName.equals("Wikipedia") &&
                    pageName.indexOf(":")<0 && pageName.indexOf("#")<0 && pages.getProperty(pageName)==null) {
                        while (true) {
                            try {
                                    //if (pageName.startsWith("tablica ca"))
                                        started=true;
                                    if (started)
                                        writeLog(pageName,topicsName,processPage(pageName,topicsName));
                                    //System.out.println("<h1>"+yearName(year,false)+" / "+(year>0 ? romanNumber(year) : "")+"</h1>\n");
                                    //System.out.println(sovereignTable(year));
                                    break;
                            }
                            catch(OlafBotException ex) {
                                    writeLog(pageName,topicsName,"Error: "+ex.getMessage());
                                    writeLog(pageName,topicsName,ex);
                                    if (ex.fatal)
                                            System.exit(0);
                                    break;
                            }
                            catch(Throwable ex) {
                                    if (ex instanceof SocketException)
                                            ex.printStackTrace();
                                    else
                                            writeLog(pageName,topicsName,ex);
                                    try {
                                        Thread.sleep(30000);
                                    } catch (Exception ex2) {
                                            writeLog(pageName,topicsName,ex2);
                                    }
                            }
                        }
                        pages.put(pageName,"");
                    }
                start=n+2;
            }
            else 
                break;
        }
    }
    
    public static void main(String[] args) throws Exception {
        wikipedia=InetAddress.getByName("pl.wikipedia.org");
        
        login();
        writeLog("","","Wystartowany");
        processTopics("Podstawowe zagadnienia z zakresu matematyki");
        //processTopics("Podstawowe zagadnienia z zakresu muzyki");
        processTopics("Podstawowe zagadnienia z zakresu biologii");
        processTopics("Podstawowe zagadnienia z zakresu architektury");
        processTopics("Podstawowe zagadnienia z zakresu astronomii");
        processTopics("Podstawowe zagadnienia z zakresu geografii");
        processTopics("Podstawowe zagadnienia z zakresu ekologii");
        processTopics("Podstawowe zagadnienia z zakresu psychologii");
        processTopics("Podstawowe zagadnienia z zakresu filozofii");
    }
}