grab' |
, , . , :
2/3 , .
.
grabber.
QWebEngineView, . :
Qt QWebEngineCookieStore
QNetworkCookieJar
MainWindow::MainWindow(QWidget *parent) :
QMainWindow(parent),
m_ui(new Ui::MainWindow),
m_store(nullptr),
m_cookieJar(new QNetworkCookieJar (this)),
m_networmManager(new QNetworkAccessManager(this)),
m_try(0),
m_currentPage(0),
m_capches(1)
{
m_ui->setupUi(this);
m_store = m_ui->webView->page()->profile()->cookieStore();
Q_ASSERT(m_store != nullptr);
connect(m_store, &QWebEngineCookieStore::cookieAdded, this, &MainWindow::handleCookieAdded);
m_store->loadAllCookies();
m_ui->webView->load(QUrl("https://www.litres.ru/"));
m_networmManager->setCookieJar(m_cookieJar);
connect(m_networmManager, &QNetworkAccessManager::finished,
this, &MainWindow::handleImage);
}
void MainWindow::handleCookieAdded(const QNetworkCookie &cookie)
{
m_cookieJar->insertCookie(cookie);
}
Grab, url :
https://www.litres.ru/static/or3/view/or.html?art_type=4&file=26599915&bname= - ReactJS&cover=%2Fstatic%2Fbookimages%2F26%2F59%2F99%2F26599923.bin.dir%2F26599923.cover.jpg&art=22880082&user=-&uuid=-
id :
void MainWindow::onGrabButtonClicked()
{
if(!parseUrl(m_ui->webView->url()))
{
return;
}
const auto paths = QStandardPaths::standardLocations(QStandardPaths::DownloadLocation);
if (paths.isEmpty()) {
qWarning()<<"There is no standard path to download";
return;
}
downloadTo(*paths.begin());
}
bool MainWindow::parseUrl(const QUrl &url)
{
const auto query = QUrlQuery(url.query(QUrl::FullyDecoded));
if (query.isEmpty()){
return false;
}
static const QVector fields = {
"file", "bname", "uuid"
};
for (const auto& key: fields) {
if (!query.hasQueryItem(key)) {
qWarning()<<"Query hasn't param"<< key;
return false;
}
}
m_name = query.queryItemValue("bname", QUrl::FullyDecoded);
m_file = query.queryItemValue("file");
m_format = "jpg";
return true;
}
MainWindow::downloadTo QPdfWriter QPainter
void MainWindow::downloadTo(const QString &path)
{
QDir dir(path);
m_writer = std::make_unique(dir.absoluteFilePath(m_name+".pdf"));
QPageLayout layout(QPageSize(QPageSize::A4), QPageLayout::Portrait,
QMarginsF(0,0,0,0));
m_writer->setPageLayout(layout);
m_writer->setResolution(96);
m_writer->setTitle(m_name);
m_painter = std::make_unique();
m_painter->begin(m_writer.get());
nextImage();
}
url :
https://www.litres.ru/pages/read_book_online/?file=26599915&page=2&rt=w1280&ft=gif
rt | , w640, w1280 |
ft | gif jpg |
page | |
file |
jpg , gif .
url: https://www.litres.ru/pages/read_book_online/?file=26599915&page=0&rt=w1280&ft=gif
, https://www.litres.ru/pages/read_book_online/?file=26599915&page=0&rt=w1280&ft=jpg
:
void MainWindow::nextImage()
{
QUrlQuery query;
query.addQueryItem("file", m_file);
query.addQueryItem("rt", "w640");
query.addQueryItem("ft", m_format);
query.addQueryItem("page", QString::number(m_currentPage));
QUrl url(BasePath);
url.setQuery(query);
m_networmManager->get(QNetworkRequest(url));
++m_currentPage;
}
void MainWindow::handleImage(QNetworkReply *reply)
{
reply->deleteLater();
if (reply->error() != QNetworkReply::NoError) {
qWarning()<<"Network error"<errorString();
if(m_try == 3) {
m_painter->end();
m_painter.reset();
m_writer.reset();
return;
}
if (m_format == "gif") {
m_format = "jpg";
} else {
m_format = "gif";
}
--m_currentPage;
++m_try;
nextImage();
return;
}
m_try = 0;
qDebug()<<"Write page"<url();
std::string f;
if (m_format == "jpg") {
f = "JPEG";
} else {
f = "GIF";
}
const auto data = reply->readAll();
const auto source = QImage::fromData(data, f.c_str());
if (source.isNull()) {
//handleCapcha(data, reply->url());
--m_currentPage;
nextImage();
return;
}
m_ui->pages->setText(QString::number(m_currentPage));
const auto dest = source.scaledToWidth(m_writer->width()/*, Qt::SmoothTransformation */);
m_painter->drawImage(QPoint(0,0), dest);
m_writer->newPage();
nextImage();
}
, .
. , , . , , .
, . , :
void MainWindow::handleCapcha(const QByteArray &page, const QUrl &url )
{
++m_capches;
m_ui->webView->page()->setHtml(page, url);
m_ui->captches->setText(QString::number(m_capches));
QEventLoop loop;
constexpr int duration = 1000*60*5;
QTimer::singleShot(duration, &loop, &QEventLoop::quit);
loop.exec();
}
WebView . , .
256 PDF A4 DPI 96 51,7 5,8 .
GitHubGist