Discussions

Ask a Question
Back to All

Could not scrape data from amazon.co.jp

I could not get html data from amazon.co.jp when we tried yesterday,

TargetURL: https://www.amazon.co.jp/s?i=hobby&bbn=2189632051&rh=n%3A2277721051%2Cn%3A2277722051%2Cn%3A2189632051%2Cp_n_feature_fifteen_browse-bin%3A3307621051&s=date-desc-rank&page=155&pf_rd_i=2189632051&pf_rd_m=A3P5ROKL5A1OLE&pf_rd_p=cf2542d6-8f93-4f8b-8803-343c480de726&pf_rd_r=6RSZ5NDTY1HYWG4670MK&pf_rd_s=merchandised-search-6&pf_rd_t=101&qid=1563941970&ref=sr_pg_155

The result of scraping was below

<!DOCTYPE html>
<!--[if lt IE 7]> <html lang="jp" class="a-no-js a-lt-ie9 a-lt-ie8 a-lt-ie7"> <![endif]-->
<!--[if IE 7]>    <html lang="jp" class="a-no-js a-lt-ie9 a-lt-ie8"> <![endif]-->
<!--[if IE 8]>    <html lang="jp" class="a-no-js a-lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!-->
<html class="a-no-js" lang="jp"><!--<![endif]--><head>
<meta http-equiv="content-type" content="text/html; charset=Shift_JIS">
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<title dir="ltr">Amazon CAPTCHA</title>
<meta name="viewport" content="width=device-width">
<link rel="stylesheet" href="https://images-na.ssl-images-amazon.com/images/G/01/AUIClients/AmazonUI-3c913031596ca78a3768f4e934b1cc02ce238101.secure.min._V1_.css">
<script>

if (true === true) {
    var ue_t0 = (+ new Date()),
        ue_csm = window,
        ue = { t0: ue_t0, d: function() { return (+new Date() - ue_t0); } },
        ue_furl = "fls-fe.amazon.co.jp",
        ue_mid = "A1VC38T7YXB528",
        ue_sid = (document.cookie.match(/session-id=([0-9-]+)/) || [])[1],
        ue_sn = "opfcaptcha.amazon.co.jp",
        ue_id = 'KKTM8F5RHSCN88RHYEX8';
}
</script>
</head>
<body>

<!--
        To discuss automated access to Amazon data please contact [email protected].
        For information about migrating to our APIs refer to our Marketplace APIs at https://developer.amazonservices.jp/ref=rm_c_sv, or our Product Advertising API at https://affiliate.amazon.co.jp/gp/advertising/api/detail/main.html/ref=rm_c_ac for advertising use cases.
-->

<!--
Correios.DoNotSend
-->

<div class="a-container a-padding-double-large" style="min-width:350px;padding:44px 0 !important">

    <div class="a-row a-spacing-double-large" style="width: 350px; margin: 0 auto">

        <div class="a-row a-spacing-medium a-text-center"><i class="a-icon a-logo"></i></div>

        <div class="a-box a-alert a-alert-info a-spacing-base">
            <div class="a-box-inner">
                <i class="a-icon a-icon-alert"></i>
                <h4>���ɕ\������Ă��镶������͂��Ă�������</h4>
                <p class="a-last">�\���󂠂�܂��񂪁A���q�l�����{�b�g�łȂ����Ƃ��m�F�����Ă��������K�v������܂��B�ŗǂ̂������ŃA�N�Z�X���Ă����������߂ɁA���g���̃u���E�U���N�b�L�[���󂯓���Ă��邱�Ƃ����m�F���������B</p>
                </div>
            </div>

            <div class="a-section">

                <div class="a-box a-color-offset-background">
                    <div class="a-box-inner a-padding-extra-large">

                        <form method="get" action="/errors/validateCaptcha" name="">
                            <input type=hidden name="amzn" value="vMTrEHkdsJiaQr9x5UfAgA==" /><input type=hidden name="amzn-r" value="&#047;s?i&#061;hobby&amp;bbn&#061;2189632051&amp;rh&#061;n&#037;3A2277721051&#037;2Cn&#037;3A2277722051&#037;2Cn&#037;3A2189632051&#037;2Cp_n_feature_fifteen_browse&#045;bin&#037;3A3307621051&amp;s&#061;date&#045;desc&#045;rank&amp;page&#061;2&amp;pf_rd_i&#061;2189632051&amp;pf_rd_m&#061;A3P5ROKL5A1OLE&amp;pf_rd_p&#061;cf2542d6&#045;8f93&#045;4f8b&#045;8803&#045;343c480de726&amp;pf_rd_r&#061;6RSZ5NDTY1HYWG4670MK&amp;pf_rd_s&#061;merchandised&#045;search&#045;6&amp;pf_rd_t&#061;101&amp;qid&#061;1563942549&amp;ref&#061;sr_pg_2" />
                            <div class="a-row a-spacing-large">
                                <div class="a-box">
                                    <div class="a-box-inner">
                                        <h4>���̉摜�Ɍ����镶������͂��Ă�������:</h4>
                                        <div class="a-row a-text-center">
                                            <img src="https://images-na.ssl-images-amazon.com/captcha/qujzzelu/Captcha_lewcclnfpa.jpg">
                                        </div>
                                        <div class="a-row a-spacing-base">
                                            <div class="a-row">
                                                <div class="a-column a-span6">
                                                </div>
                                                <div class="a-column a-span7 a-span-last a-text-right">
                                                    <a onclick="window.location.reload()">�ʂ̉摜�ɂ��Ă�������</a>
                                                </div>
                                            </div>
                                            <input autocomplete="off" spellcheck="false" placeholder="��������͂��Ă�������" id="captchacharacters" name="field-keywords" class="a-span12" autocapitalize="off" autocorrect="off" type="text">
                                        </div>
                                    </div>
                                </div>
                            </div>

                            <div class="a-section a-spacing-extra-large">

                                <div class="a-row">
                                    <span class="a-button a-button-primary a-span12">
                                        <span class="a-button-inner">
                                            <button type="submit" class="a-button-text">�V���b�s���O�𑱂���</button>
                                        </span>
                                    </span>
                                </div>

                            </div>
                        </form>

                    </div>
                </div>

            </div>

        </div>

        <div class="a-divider a-divider-section"><div class="a-divider-inner"></div></div>

        <div class="a-text-center a-spacing-small a-size-mini">
            <a href="https://www.amazon.co.jp/gp/help/customer/display.html/ref=footer_cou/376-1267051-7966065?ie=UTF8&nodeId=643006">���p�K��</a>
            <span class="a-letter-space"></span>
            <span class="a-letter-space"></span>
            <span class="a-letter-space"></span>
            <span class="a-letter-space"></span>
            <a href="https://www.amazon.co.jp/gp/help/customer/display.html/ref=footer_privacy/376-1267051-7966065?ie=UTF8&nodeId=643000">�v���C�o�V�[�K��</a>
        </div>

        <div class="a-text-center a-size-mini a-color-secondary">
          &copy; 1996-2013, Amazon.com, Inc. or its affiliates
          <script>
           if (true === true) {
             document.write('<img src="https://fls-fe.amaz'+'on.co.jp/'+'1/oc-csi/1/OP/requestId=KKTM8F5RHSCN88RHYEX8&js=1" />');
           };
          </script>
          <noscript>
            <img src="https://fls-fe.amazon.co.jp/1/oc-csi/1/OP/requestId=KKTM8F5RHSCN88RHYEX8&js=0" />
          </noscript>
        </div>
    </div>
    <script>
    if (true === true) {
        var elem = document.createElement("script");
        elem.src = "https://images-fe.ssl-images-amazon.com/images/G/01/csminstrumentation/csm-captcha-instrumentation.min._V" + (+ new Date()) + "_.js";
        document.getElementsByTagName('head')[0].appendChild(elem);
    }
    </script>
</body></html>