Discussions
Could not scrape data from amazon.co.jp
over 5 years ago by Genki
I could not get html data from amazon.co.jp when we tried yesterday,
The result of scraping was below
<!DOCTYPE html>
<!--[if lt IE 7]> <html lang="jp" class="a-no-js a-lt-ie9 a-lt-ie8 a-lt-ie7"> <![endif]-->
<!--[if IE 7]> <html lang="jp" class="a-no-js a-lt-ie9 a-lt-ie8"> <![endif]-->
<!--[if IE 8]> <html lang="jp" class="a-no-js a-lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!-->
<html class="a-no-js" lang="jp"><!--<![endif]--><head>
<meta http-equiv="content-type" content="text/html; charset=Shift_JIS">
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<title dir="ltr">Amazon CAPTCHA</title>
<meta name="viewport" content="width=device-width">
<link rel="stylesheet" href="https://images-na.ssl-images-amazon.com/images/G/01/AUIClients/AmazonUI-3c913031596ca78a3768f4e934b1cc02ce238101.secure.min._V1_.css">
<script>
if (true === true) {
var ue_t0 = (+ new Date()),
ue_csm = window,
ue = { t0: ue_t0, d: function() { return (+new Date() - ue_t0); } },
ue_furl = "fls-fe.amazon.co.jp",
ue_mid = "A1VC38T7YXB528",
ue_sid = (document.cookie.match(/session-id=([0-9-]+)/) || [])[1],
ue_sn = "opfcaptcha.amazon.co.jp",
ue_id = 'KKTM8F5RHSCN88RHYEX8';
}
</script>
</head>
<body>
<!--
To discuss automated access to Amazon data please contact [email protected].
For information about migrating to our APIs refer to our Marketplace APIs at https://developer.amazonservices.jp/ref=rm_c_sv, or our Product Advertising API at https://affiliate.amazon.co.jp/gp/advertising/api/detail/main.html/ref=rm_c_ac for advertising use cases.
-->
<!--
Correios.DoNotSend
-->
<div class="a-container a-padding-double-large" style="min-width:350px;padding:44px 0 !important">
<div class="a-row a-spacing-double-large" style="width: 350px; margin: 0 auto">
<div class="a-row a-spacing-medium a-text-center"><i class="a-icon a-logo"></i></div>
<div class="a-box a-alert a-alert-info a-spacing-base">
<div class="a-box-inner">
<i class="a-icon a-icon-alert"></i>
<h4>���ɕ\������Ă��镶������͂��Ă�������</h4>
<p class="a-last">�\����܂��A���q�l�����{�b�g�łȂ����Ƃ��m�F�����Ă��������K�v������܂��B�ŗǂ̂������ŃA�N�Z�X���Ă����������߂ɁA���g���̃u���E�U���N�b�L�[������Ă��邱�Ƃ����m�F���������B</p>
</div>
</div>
<div class="a-section">
<div class="a-box a-color-offset-background">
<div class="a-box-inner a-padding-extra-large">
<form method="get" action="/errors/validateCaptcha" name="">
<input type=hidden name="amzn" value="vMTrEHkdsJiaQr9x5UfAgA==" /><input type=hidden name="amzn-r" value="/s?i=hobby&bbn=2189632051&rh=n%3A2277721051%2Cn%3A2277722051%2Cn%3A2189632051%2Cp_n_feature_fifteen_browse-bin%3A3307621051&s=date-desc-rank&page=2&pf_rd_i=2189632051&pf_rd_m=A3P5ROKL5A1OLE&pf_rd_p=cf2542d6-8f93-4f8b-8803-343c480de726&pf_rd_r=6RSZ5NDTY1HYWG4670MK&pf_rd_s=merchandised-search-6&pf_rd_t=101&qid=1563942549&ref=sr_pg_2" />
<div class="a-row a-spacing-large">
<div class="a-box">
<div class="a-box-inner">
<h4>���̉摜�Ɍ����镶������͂��Ă�������:</h4>
<div class="a-row a-text-center">
<img src="https://images-na.ssl-images-amazon.com/captcha/qujzzelu/Captcha_lewcclnfpa.jpg">
</div>
<div class="a-row a-spacing-base">
<div class="a-row">
<div class="a-column a-span6">
</div>
<div class="a-column a-span7 a-span-last a-text-right">
<a onclick="window.location.reload()">�ʂ̉摜�ɂ��Ă�������</a>
</div>
</div>
<input autocomplete="off" spellcheck="false" placeholder="��������͂��Ă�������" id="captchacharacters" name="field-keywords" class="a-span12" autocapitalize="off" autocorrect="off" type="text">
</div>
</div>
</div>
</div>
<div class="a-section a-spacing-extra-large">
<div class="a-row">
<span class="a-button a-button-primary a-span12">
<span class="a-button-inner">
<button type="submit" class="a-button-text">�V���b�s���O�𑱂���</button>
</span>
</span>
</div>
</div>
</form>
</div>
</div>
</div>
</div>
<div class="a-divider a-divider-section"><div class="a-divider-inner"></div></div>
<div class="a-text-center a-spacing-small a-size-mini">
<a href="https://www.amazon.co.jp/gp/help/customer/display.html/ref=footer_cou/376-1267051-7966065?ie=UTF8&nodeId=643006">���p�K��</a>
<span class="a-letter-space"></span>
<span class="a-letter-space"></span>
<span class="a-letter-space"></span>
<span class="a-letter-space"></span>
<a href="https://www.amazon.co.jp/gp/help/customer/display.html/ref=footer_privacy/376-1267051-7966065?ie=UTF8&nodeId=643000">�v���C�o�V�[�K��</a>
</div>
<div class="a-text-center a-size-mini a-color-secondary">
© 1996-2013, Amazon.com, Inc. or its affiliates
<script>
if (true === true) {
document.write('<img src="https://fls-fe.amaz'+'on.co.jp/'+'1/oc-csi/1/OP/requestId=KKTM8F5RHSCN88RHYEX8&js=1" />');
};
</script>
<noscript>
<img src="https://fls-fe.amazon.co.jp/1/oc-csi/1/OP/requestId=KKTM8F5RHSCN88RHYEX8&js=0" />
</noscript>
</div>
</div>
<script>
if (true === true) {
var elem = document.createElement("script");
elem.src = "https://images-fe.ssl-images-amazon.com/images/G/01/csminstrumentation/csm-captcha-instrumentation.min._V" + (+ new Date()) + "_.js";
document.getElementsByTagName('head')[0].appendChild(elem);
}
</script>
</body></html>