-

   rss_rss_hh_new

 - e-mail

 

 -

 LiveInternet.ru:
: 17.03.2011
:
:
: 51

:


[ ] Wikipedia

, 18 2017 . 17:41 +
fonkost 17:41

Wikipedia

, Selenium Webdriver , Wikipedia, (, ).

, , , .
Java, Selenium Webdriver Chrome. Chrome, , , . , PhantomJs, . Chrome.

, , https://ru.wikipedia.org/wiki/ :

@BeforeClass
public static void Start() {
    driver = DriverHelper.getDriver();
}

@Test
public void testGetDriver() {
    driver.navigate().to("https://ru.wikipedia.org/wiki/%D0%A0%D1%8E%D1%80%D0%B8%D0%BA");
    assertTrue(driver.getTitle().equals("  "));
}

@AfterClass
public static void Stop() {
    driver.quit();
}

DriverHelper getDriver(), :

public final class DriverHelper{
    private static final int TIMEOUT = 30;

    public static WebDriver getDriver() {
        WebDriver driver = new ChromeDriver();
        driver.manage().window().maximize();
        driver.manage().timeouts().implicitlyWait(TIMEOUT, TimeUnit.SECONDS);
        return driver;
    }
}

, , .

Person


Person, , Wikipedia PersonPage.

Person name url. name , , , , .. , , .

Url Wikipedia, .
, :

@Test
public void testGetPerson() throws Exception {
    PersonPage page = new PersonPage(driver);
    Person person = page.getPerson("https://ru.wikipedia.org/wiki/_");
    assertTrue(person.getName().equals(" "));
    assertTrue(person.getUrl().equals(
        "https://ru.wikipedia.org/wiki/
        %D0%92%D0%BB%D0%B0%D0%B4%D0%B8%D0%BC%D0%B8%D1%80_
        %D0%90%D0%BB%D0%B5%D0%BA%D1%81%D0%B0%D0%BD%D0%B4%D1%80%D0%BE%D0%B2%D0%B8%D1%87"));
}

testGetPerson() . PersonPage, . Url url , firstHeading. getPerson():

public Person getPerson(String url) throws MalformedURLException {
    driver.navigate().to(url);

    String name = getName();

    Person person = new Person(driver.getCurrentUrl());
    person.setName(name);
    return person;
}

private String getName() throws MalformedURLException {
    String namePage = driver.findElement(By.cssSelector("#firstHeading")).getText();
    return namePage;
}

.
, url, : , Wikipedia , . , , , , . , .

: https://ru.wikipedia.org/wiki/_ https://ru.wikipedia.org/wiki/__, https://ru.wikipedia.org/wiki/_ https://ru.wikipedia.org/wiki/__


, Wikipedia.
( ):

@Test
public void testGetChildrenUrl() throws Exception {
    driver.navigate().to("https://ru.wikipedia.org/wiki/");
    PersonPage page = new PersonPage(driver);
    List children = page.getChildrenUrl();
    assertTrue(children.size() == 1);
    Person person = children.get(0);
    assertTrue(person.getUrl().equals("https://ru.wikipedia.org/wiki/
        %D0%98%D0%B3%D0%BE%D1%80%D1%8C_
        %D0%A0%D1%8E%D1%80%D0%B8%D0%BA%D0%BE%D0%B2%D0%B8%D1%87"));
}

, PersonPage :

public List getChildrenUrl() throws MalformedURLException {
    List childrenLinks = driver.findElements(
        By.xpath("//table[contains(@class, 'infobox')]//tr[th[.=':']]//a"));
    List children = new ArrayList();
    for (WebElement link : childrenLinks) {
        Person person = new Person(link.getAttribute("href"));
        children.add(person);
    }
    return children;
}

, Wikipedia . , ( ). , , , , ( ).

, .
, , ( ) , 16 , Wikipedia , 5 .

@Test
public void testChildrenSize() throws Exception {
    driver.navigate().to("https://ru.wikipedia.org/wiki/");
    PersonPage page = new PersonPage(driver);
    List children = page.getChildrenUrl();
    assertTrue(children.size() == 1);

    driver.navigate().to("https://ru.wikipedia.org/wiki/_");
    children = page.getChildrenUrl();
    assertTrue(children.size() == 16);

    driver.navigate().to("https://ru.wikipedia.org/wiki/__(_)");
    children = page.getChildrenUrl();
    assertTrue(children.size() == 0);

    driver.navigate().to("https://ru.wikipedia.org/wiki/_");
    children = page.getChildrenUrl();
    assertTrue(children.size() == 0);
}

Person (int id) (List children), .
. , .

public void setChild(int childId) {
    if (!children.contains(childId)) {
        children.add(childId);
    }
}

, .


. GenerateGenealogicalTree main.

, , . , . , . , (+ ) . , , . . . , .

. , , ( , . : , I).

, , .. .

:

  1. ,
  2. . .
  3. , . .
  4. , .
  5. , .

:

public final class GenerateGenealogicalTree {
    public static void main(String[] args) throws Exception {
        String url = getUrl(args);
        GenealogicalTree tree = getGenealogicalTreeByUrl(url);
        saveResultAndQuit(tree);
    }

    public static GenealogicalTree getGenealogicalTreeByUrl(String url) throws MalformedURLException {
        WebDriver driver = DriverHelper.getDriver();
        Person person = new Person(url);
        GenealogicalTree tree = new GenealogicalTree(person);
        PersonPage page = new PersonPage(driver);
        while (tree.hasUnvisitingPerson()) {
            String currentUrl = tree.getCurrentUrl();
            Person currentPerson = page.getPerson(currentUrl);
            tree.setCurrentPerson(currentPerson);
            if (!tree.isCurrentPersonDeleted()) {
                List children = page.getChildrenUrl();
                tree.setChildren(children);
             }
             tree.updatingCurrentPerson();
        }
        driver.quit();
        return tree;
    }
}

GenealogicalTree : List allPersons , int indexCurrentUnvisitedPerson allPersons, boolean isCurrentPersonDeleted , (.. ).

public final class GenealogicalTree {
    private List allPersons;
    private int indexCurrentUnvisitedPerson;
    private boolean isCurrentPersonDeleted;
}

, :

public GenealogicalTree(Person person) {
    if (person == null) {
        throw new IllegalArgumentException("   ");
    }
    allPersons = new ArrayList();
    allPersons.add(person);
    indexCurrentUnvisitedPerson = 0;
    isCurrentPersonDeleted = false;
}

. .

, : , , .

public boolean hasUnvisitingPerson() {
    return indexCurrentUnvisitedPerson < allPersons.size();
}

url- url :

public String getCurrentUrl() {
    return allPersons.get(indexCurrentUnvisitedPerson).getUrl();
}

setCurrentPerson .

url, . , . url-. setCurrentPerson , .

( , url- ), . , . , . .

, . . , url url, , . , , . , url-, .

public void setCurrentPerson(Person currentPerson) {
    int indexDuplicate = allPersons.indexOf(currentPerson);
    if ((0 <= indexDuplicate) && (indexDuplicate < indexCurrentUnvisitedPerson)) {
        removePerson(indexDuplicate);
    } else {
        allPersons.get(indexCurrentUnvisitedPerson).copyMainData(currentPerson);
        isCurrentPersonDeleted = false;
    }
}

indexOf(Object object) Person equals(Object object) hashCode():

@Override
public boolean equals(Object object) {
    if ((object == null) || (!(object instanceof Person))) {
        return false;
    }

    Person person = (Person) object;
    return this.url.equals(person.url);
}

@Override
public int hashCode() {
    return this.url.hashCode();
}

?
:

  1. . , , , ,
  2. . : 8- ( ).
  3. : , , , . , Wikipedia

, , .. , ( ).

, . . , , , , . , , .

( ).

.

private void removePerson(int indexDuplicate) {
    int idRemovedPerson = allPersons.get(indexCurrentUnvisitedPerson).getId();
    int idDuplicate = allPersons.get(indexDuplicate).getId();
    for (int i = 0; i < indexCurrentUnvisitedPerson; i++) {
        Person person = allPersons.get(i);
        person.replaceChild(idRemovedPerson, idDuplicate);
    }
    allPersons.remove(indexCurrentUnvisitedPerson);
    isCurrentPersonDeleted = true;
}

Person :

public void replaceChild(int oldId, int newId) {
    if (oldId == newId) {
        return;
    }
    if (!children.contains(oldId)) {
        return;
    }
    children.remove((Object) oldId);
    setChild(newId);
}

.

, .
, , , .. .
, , .. .

, , . , . , , , , .

setChildren() .

public void setChildren(List children) {
    if (isCurrentPersonDeleted) {
        throw new IllegalArgumentException(
            "    .    ");
    }

    for (Person person : children) {
        int index = allPersons.indexOf(person);
        int id;
        if (index >= 0) {
            id = allPersons.get(index).getId();
        } else {
            allPersons.add(person);
            id = person.getId();
        }
        allPersons.get(indexCurrentUnvisitedPerson).setChild(id);
    }
}

, . : , , . , .

public void updatingCurrentPerson() {
    if (isCurrentPersonDeleted) {
        isCurrentPersonDeleted = false;
    } else {
        indexCurrentUnvisitedPerson++;
    }
}

: (0- ), (1- ) (, Wikipedia), (2- ) ( , 2- , ), (3- ) .

, 100%, , , . javadoc.

: GenealogicalTree , ( GenerateGenealogicalTree). GenerateGenealogicalTree. .
.


, , - , . , 17 2017 Wikipedia 3448 . .

, genealogicaltree. root . MySQL JDBC Type 4 driver.

:

public class MySqlHelper {
    private static final String url = "jdbc:mysql://localhost:3306/genealogicaltree" 
        + "?serverTimezone=UTC&useUnicode=yes&characterEncoding=UTF-8";
    private static final String user = "root";
    private static final String password = "";

    private static Connection connection;
    private static Statement statement;
    private static ResultSet resultSet;

    public static void saveTree(String tableName, List tree) throws MalformedURLException {
        try {
            connection = DriverManager.getConnection(url, user, password);
            statement = connection.createStatement();

            String table = createTable(tableName);
            statement.executeUpdate(table);

            for (Person person : tree) {
                String insert = insertPerson(tableName, person);
                statement.executeUpdate(insert);
            }
        } catch (SQLException sqlEx) {
            sqlEx.printStackTrace();
        } finally {
            try {
                connection.close();
            } catch (SQLException se) {
            }
            try {
                statement.close();
            } catch (SQLException se) {
            }
        }
    }

    private static String createTable(String tableName) {
        StringBuilder sql = new StringBuilder();
        sql.append("CREATE TABLE " + tableName + " (");
        sql.append("id INTEGER not NULL, ");
        sql.append("name VARCHAR(255), ");
        sql.append("url VARCHAR(2048), ");
        sql.append("children VARCHAR(255), ");
        sql.append("PRIMARY KEY ( id ))");
        return sql.toString();
    }

    private static String insertPerson(String tableName, Person person) {
        StringBuilder sql = new StringBuilder();
        sql.append("INSERT INTO genealogicaltree." + tableName);
        sql.append("(id, name, url, nameUrl, children, parents, numberGeneration) \n VALUES (");
        sql.append(person.getId() + ",");
        sql.append("'" + person.getName() + "',");
        sql.append("'" + person.getUrl() + "',");
        sql.append("'" + person.getChildren() + "',");
        sql.append(");");
        return sql.toString();
    }
}

:

private static void saveResultAndQuit(GenealogicalTree tree) throws Exception {
    Timestamp timestamp = new Timestamp(System.currentTimeMillis());
    String tableName = "generate" + timestamp.getTime();
    MySqlHelper.saveTree(tableName, tree.getGenealogicalTree());
}


GenerateGenealogicalTree.main() , .

:

  1. (, 1153 )
  2. : , VII
  3. , VII
  4. , , , IV
  5. . , ,

getChildrenUrl() , . 1 , , . 2 , , extiw. 3-4 , , sup ( ). 5 , new ( ).

testChildrenSize(), :

driver.navigate().to("https://ru.wikipedia.org/wiki/_");
children = page.getChildrenUrl();
assertTrue(children.size() == 3);

driver.navigate().to("https://ru.wikipedia.org/wiki/_VII");
children = page.getChildrenUrl();
assertTrue(children.size() == 5);

driver.navigate().to("https://ru.wikipedia.org/wiki/_IV__,___̸");
children = page.getChildrenUrl();
assertTrue(children.size() == 0);

driver.navigate().to("https://ru.wikipedia.org/wiki/__(_)");
children = page.getChildrenUrl();
assertTrue(children.size() == 5);

.

getChildrenUrl():

public List getChildrenUrl() throws MalformedURLException {
    waitLoadPage();
    List childrenLinks = getChildrenLinks();
    List children = new ArrayList();
    for (WebElement link : childrenLinks) {
        if (DriverHelper.isSup(link)) {
            continue;
        }
        Person person = new Person(link.getAttribute("href"));
        person.setNameUrl(link.getText());
        if (person.isCorrectNameUrl()) {
            children.add(person);
        }
    }
    return children;
}

private List getChildrenLinks() {
    List childrenLinks = DriverHelper.getElements(driver,
        By.xpath("//table[contains(@class, 'infobox')]//tr[th[.=':']]" +
                "//a[not(@class='new' or @class='extiw')]"));
    return childrenLinks;
}

private void waitLoadPage() {
    this.driver.findElement(By.cssSelector("#firstHeading"));
}

public final class DriverHelper {
    /**
     *       .
* - , * , * , , . * , , * . */ public static List getElements(WebDriver driver, By by) { driver.manage().timeouts().implicitlyWait(0, TimeUnit.SECONDS); List result = driver.findElements(by); driver.manage().timeouts().implicitlyWait(DriverHelper.TIMEOUT, TimeUnit.SECONDS); return result; } public static boolean isSup(WebElement element) { String parentTagName = element.findElement(By.xpath(".//..")).getTagName(); return parentTagName.equals("sup"); } } public class Person { private String nameUrl; public boolean isCorrectNameUrl() { Pattern p = Pattern.compile("^[\\D]+.+"); Matcher m = p.matcher(nameUrl); return m.matches(); } }

nameUrl , .
.

, Wikipedia, Ը . , .


383 ( , , , 18 II), , , , , II, . , , .

. :

  1. -
  2. , V, I - Ը

, , , , .

, , , .. ( , , -, )
getChildrenUrl(), , url . , , .. , .

public List getChildrenUrl() {
    waitLoadPage();
    if (DriverHelper.hasAnchor(driver)) {
        return new ArrayList();
    }
    ...
}

public final class DriverHelper {
    ...
    public static boolean hasAnchor(WebDriver driver) throws MalformedURLException {
        URL url = new URL(driver.getCurrentUrl());
        return url.getRef() != null;
    }
    ...
}

, , , , :

@Test
public void testEmptyChildrenInPersonWithAnchor() throws Exception {
    driver.navigate().to("https://ru.wikipedia.org/wiki/_");
    PersonPage page = new PersonPage(driver);
    List children = page.getChildrenUrl();
    assertTrue(children.size() == 5);

    driver.navigate().to(
        "https://ru.wikipedia.org/wiki/_#.D0.A1.D0.B5.D0.BC.D1.8C.D1.8F");
    children = page.getChildrenUrl();
    assertTrue(children.size() == 0);
}

, , .

, ? , , , : , , . .


: , .
getName():

private String getName() throws MalformedURLException {
    waitLoadPage();
    String namePage = driver.findElement(By.cssSelector("#firstHeading")).getText();

    if (!DriverHelper.hasAnchor(driver)) {
        return namePage;
    }

    String anchor = DriverHelper.getAnchor(driver);
    List list = DriverHelper.getElements(driver, By.id(anchor));

    if (list.size() == 0) {
        return namePage;
    }

    String name = list.get(0).getText().trim();
    return name.isEmpty() ? namePage : name;
}

public final class DriverHelper {
    ...
    public static String getAnchor(WebDriver driver) throws MalformedURLException {
        URL url = new URL(driver.getCurrentUrl());
        return url.getRef();
    }
    ...
}

url , , . , I. , .

, . , , , , .
.

,


: , , . ?!

, .. getChildrenUrl(). nameUrl , .

, .

, , . , , , nameUrl ( "" ).

, .

:
id name children url urlName
8 []
9 []
10 []
15 []
23 () []
26 []
28 []
29 []
36 I []
133 []
360 [] -

. insert II ( ) - , nameUrl II '. setName setNameUrl Person, .

, Wikipedia . , , . , (.. ). , .

, Person ( ):

private List parents = new ArrayList();
private int numberGeneration = 0;

public void setParent(int parent) {
    parents.add(parent);
}

public void setNumberGeneration(int numberGeneration) {
    if (this.numberGeneration == 0) {
        this.numberGeneration = numberGeneration;
    }
}

, , . , (, , , ). , , , , , , , , .

, , .

, . , , , . .. , , , , , .

setChildren(List children) GenerateGenealogicalTree:

public void setChildren(List children) {
    if (isCurrentPersonDeleted) {
        throw new IllegalArgumentException(
            "    .    ");
    }

    Person currentPerson = allPersons.get(indexCurrentUnvisitedPerson);
    int numberGeneration = currentPerson.getNumberGeneration();
    numberGeneration++;
    int idParent = currentPerson.getId();
    for (Person person : children) {
        int index = allPersons.indexOf(person);
        int id;
        if (index >= 0) { //  ,    
            allPersons.get(index).setParent(idParent);
            id = allPersons.get(index).getId();
        } else { //  
            person.setNumberGeneration(numberGeneration);
            person.setParent(idParent);
            allPersons.add(person);
            id = person.getId();
        }
        currentPerson.setChild(id);
    }
}

, , .


:



( 3452 ).

:

) Wikipedia
) . , , , II 29 .
) .

, , , II 28 . , , III II .

Original source: habrahabr.ru (comments, light).

https://habrahabr.ru/post/338190/

:  

: [1] []
 

:
: 

: ( )

:

  URL