-

   rss_rss_hh_new

 - e-mail

 

 -

 LiveInternet.ru:
: 17.03.2011
:
:
: 51

:


grab'

, 31 2017 . 09:42 +

, , . , :


2/3 , .


.


grabber.


QWebEngineView, . :


Sharing QNetworkAccessManager QWebEngineView


Qt QWebEngineCookieStore
QNetworkCookieJar


MainWindow::MainWindow(QWidget *parent) :
    QMainWindow(parent),
    m_ui(new Ui::MainWindow),
    m_store(nullptr),
    m_cookieJar(new QNetworkCookieJar (this)),
    m_networmManager(new QNetworkAccessManager(this)),
    m_try(0),
    m_currentPage(0),
    m_capches(1)
{
    m_ui->setupUi(this);

    m_store = m_ui->webView->page()->profile()->cookieStore();
    Q_ASSERT(m_store != nullptr);
    connect(m_store, &QWebEngineCookieStore::cookieAdded, this, &MainWindow::handleCookieAdded);
    m_store->loadAllCookies();
    m_ui->webView->load(QUrl("https://www.litres.ru/"));
    m_networmManager->setCookieJar(m_cookieJar);

    connect(m_networmManager, &QNetworkAccessManager::finished,
            this, &MainWindow::handleImage);
}

void MainWindow::handleCookieAdded(const QNetworkCookie &cookie)
{
    m_cookieJar->insertCookie(cookie);
}

Grab, url :


https://www.litres.ru/static/or3/view/or.html?art_type=4&file=26599915&bname= -  ReactJS&cover=%2Fstatic%2Fbookimages%2F26%2F59%2F99%2F26599923.bin.dir%2F26599923.cover.jpg&art=22880082&user=-&uuid=-

id :


void MainWindow::onGrabButtonClicked()
{
    if(!parseUrl(m_ui->webView->url()))
    {
        return;
    }

    const auto paths = QStandardPaths::standardLocations(QStandardPaths::DownloadLocation);
    if (paths.isEmpty()) {
        qWarning()<<"There is no standard path to download";
        return;
    }
    downloadTo(*paths.begin());
}

bool MainWindow::parseUrl(const QUrl &url)
{
    const auto query = QUrlQuery(url.query(QUrl::FullyDecoded));
    if (query.isEmpty()){
        return false;
    }

    static const QVector fields = {
        "file", "bname", "uuid"
    };

    for (const auto& key: fields) {
        if (!query.hasQueryItem(key)) {
            qWarning()<<"Query hasn't param"<< key;
            return false;
        }
    }

    m_name = query.queryItemValue("bname", QUrl::FullyDecoded);
    m_file = query.queryItemValue("file");
    m_format = "jpg";

    return true;
}

MainWindow::downloadTo QPdfWriter QPainter


void MainWindow::downloadTo(const QString &path)
{
    QDir dir(path);

    m_writer = std::make_unique(dir.absoluteFilePath(m_name+".pdf"));
    QPageLayout layout(QPageSize(QPageSize::A4), QPageLayout::Portrait,
                       QMarginsF(0,0,0,0));

    m_writer->setPageLayout(layout);
    m_writer->setResolution(96);
    m_writer->setTitle(m_name);
    m_painter = std::make_unique();
    m_painter->begin(m_writer.get());

    nextImage();
}


url :


https://www.litres.ru/pages/read_book_online/?file=26599915&page=2&rt=w1280&ft=gif

rt , w640, w1280
ft gif jpg
page
file

jpg , gif .
url: https://www.litres.ru/pages/read_book_online/?file=26599915&page=0&rt=w1280&ft=gif , https://www.litres.ru/pages/read_book_online/?file=26599915&page=0&rt=w1280&ft=jpg


:


void MainWindow::nextImage()
{
    QUrlQuery query;
    query.addQueryItem("file", m_file);
    query.addQueryItem("rt", "w640");
    query.addQueryItem("ft", m_format);
    query.addQueryItem("page", QString::number(m_currentPage));

    QUrl url(BasePath);
    url.setQuery(query);
    m_networmManager->get(QNetworkRequest(url));
    ++m_currentPage;
}

void MainWindow::handleImage(QNetworkReply *reply)
{
    reply->deleteLater();

    if (reply->error() != QNetworkReply::NoError) {
        qWarning()<<"Network error"<errorString();
        if(m_try == 3) {
            m_painter->end();
            m_painter.reset();
            m_writer.reset();
            return;
        }

        if (m_format == "gif") {
            m_format = "jpg";
        } else {
            m_format = "gif";
        }
        --m_currentPage;
        ++m_try;
        nextImage();
        return;
    }
    m_try = 0;

    qDebug()<<"Write page"<url();
    std::string f;
    if (m_format == "jpg") {
        f = "JPEG";
    } else {
        f = "GIF";
    }
    const auto data = reply->readAll();
    const auto source = QImage::fromData(data, f.c_str());
    if (source.isNull()) {
        //handleCapcha(data, reply->url());
        --m_currentPage;
        nextImage();
        return;
    }

    m_ui->pages->setText(QString::number(m_currentPage));
    const auto dest = source.scaledToWidth(m_writer->width()/*, Qt::SmoothTransformation */);
    m_painter->drawImage(QPoint(0,0), dest);
    m_writer->newPage();

    nextImage();
}


, .


. , , . , , .

, . , :


void MainWindow::handleCapcha(const QByteArray &page, const QUrl &url )
{
    ++m_capches;
    m_ui->webView->page()->setHtml(page, url);
    m_ui->captches->setText(QString::number(m_capches));
    QEventLoop loop;
    constexpr int duration = 1000*60*5;
    QTimer::singleShot(duration, &loop, &QEventLoop::quit);
    loop.exec();
}

WebView . , .



256 PDF A4 DPI 96 51,7 5,8 .
GitHubGist

Original source: habrahabr.ru (comments, light).

https://habrahabr.ru/post/334412/

:  

: [1] []
 

:
: 

: ( )

:

  URL