export const pwsSnippet1 = `
    # items.py \n\nimport scrapy \n\nclass RestScrapeItem(scrapy.Item): \n\n  # here we are defining the storage containers for the data we plan to scrape \n\n  title = scrappy.Field() \n  address = scrappy.Field() \n  description = scrape.Field() \n\n  # calculated \n\n  location = scrappy.Field
`.trim()

export const pwsSnippet2 = `
    # _init_.py \n\nimport scrapy \nfrom scrapy import Spider \nfrom restScrape.items import RestScrapeItem \nfrom scrapy.selector import Selector \n\nclass RestaurantSpider(scrapy.Spider): \n   name = ['foodNetwork'] \n   allowed_domains = ['foodnetwork.com'] \n   start_urls = ['http://foodnetwork.com/restaurants/shows/diners-drive-ins-and-dives/a-z'] \n\n   def parse(self, response): \n   restaurantNames = Selector(response).css('div.m-MediaBlock_m-TextWrap') \n   for name in restaurantNames: \n      item = RestScrapeItem() \n      item['title'] = ''. join(name.css('span.m-MediaBlock_a-HeadlineText::text').extract()).strip() \n      try: item['address'] = ''.join(name.css('div.m-Info_a-Address::text').extract()).strip() \n      except AttributeError: item['address'] = 'n/a' \n      item['description'] = ''.join(name.css('div.m-MediaBlock_a-Description p::text').extract()).strip() \n      yield item \n\n      # Target pagination items and run spyder through subsequent next pages until there is no next page item \n\n      next_page = response.xpath('//li/a[@class=“o-pagination_a-Button o-Pagination_a-NextButton”]/@href').extract_fist() \n      if next_page is not None: \n         next_page = response.urljoin(next_page) \n         yield scrapy.request(next_page, callback=self.parse)
`.trim()

export const pwsSnippet3 = `
    # pipelines.py \n\n# We will define our item pipelines here \n# Don't forget to add your pipeline to the ITEM_PIPELINES setting \n# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html \n\nfrom scrapy.conf import settings \nfrom scrapy.exceptions import DropItem \nfrom twisted.internet import defer \nimport logging \nimport pymongo \nimport traceback \nimport treq \n\nAPI_KEY = 'YOUR_API_KEY' \n\nclass GeoPipeline(object):
    # A pipeline that geocodes addresses using Google's API

    # Create a new instance and pass it crawler's stats object
    @classmethod
    def from_crawler(cls, crawler):
        return cls(crawler.stats)

    # Initialize empty cache and stats object
    def __init__(self, stats):
        self.stats = stats

    @defer.inlineCallbacks
    def geocode(self, address):
        # This method makes a call to Google's geocoding API. You shouldn't call this more than 5 times per second

        # The url for this API
        endpoint = 'https://maps.googleapis.com/maps/api/geocode/json'

        # Do the call
        parms = [('address', address), ('key', API_KEY)]
        response = yield treq.post(endpoint, params=parms)
        logging.debug(parms)

        # Decode the response as json
        content = yield response.json()
        logging.debug(content)

        # If the status isn't ok, return it as a string
        if content['status'] != 'OK':
            raise Exception('Unexpected status="%s" for address="%s"' %
                            (content['status'], address))

        # Extract the address and geo-point and set item's fields
        geo = content['results'][0]["geometry"]["location"]

        # Return the final value
        defer.returnValue({"lat": geo["lat"], "lon": geo["lng"]})

    @defer.inlineCallbacks
    def process_item(self, item, spider):
        # Pipeline's main method. Uses inlineCallbacks to do asynchronous REST requests

        # Set by previous step (spider or pipeline). Don't do anything apart from increasing stats
        if "location" in item:
            self.stats.inc_value('geo_pipeline/already_set')
            defer.returnValue(item)
            return

        # The item has to have the address field set
        assert ("address" in item) and (len(item["address"]) > 0)

        # Extract the address from the item.
        try: item["location"] = yield self.geocode(item["address"])
        except: self.stats.inc_value('geo_pipeline/errors')    
        print(traceback.format_exc())

        # Return the item for the next stage
        defer.returnValue(item)

class MongoPipeline(object):

    collection_name = 'foodnetwork_ddd_restaurants'

    def __init__(self, mongo_uri, mongo_db):
        self.mongo_uri = mongo_uri
        self.mongo_db = mongo_db

    # Pull in information from settings.py
    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            mongo_uri=crawler.settings.get('MONGO_URI'),
            mongo_db=crawler.settings.get('MONGO_DATABASE')
        )

    # Initializing spider & opening db connection
    def open_spider(self, spider):
        self.client = pymongo.MongoClient(self.mongo_uri)
        self.db = self.client[self.mongo_db]

    # Clean up when spider is closed
    def close_spider(self, spider):
        self.client.close()

    # Item processing to check for duplicates in database and drop item if it is already found 
    def process_item(self, item, spider):
        dup_check = self.db[self.collection_name].find({'title':item['title']}).count()
        if dup_check == 0 :     
            self.db[self.collection_name].insert(dict(item))
            logging.debug("Restaurant added to MongoDB database!")
        else:
           logging.debug("Restaurant exists!")     
        return item
`.trim()

export const pwsSnippet4 = `
# Scrapy settings for restScrape project

BOT_NAME = 'restScrape'

SPIDER_MODULES = ['restScrape.spiders']
NEWSPIDER_MODULE = 'restScrape.spiders'

# Crawl responsibly by identifying yourself (and your website) on the user-agent
USER_AGENT = 'IDENTIFY_YOURSELF_HERE'

# Obey robots.txt rules
ROBOTSTXT_OBEY = True

...

# Configure a delay for requests for the same website (default: 0)
# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
DOWNLOAD_DELAY = 3
RANDOMIZE_DOWNLOAD_DELAY = False

# The download delay setting will honor only one of:
CONCURRENT_REQUESTS_PER_DOMAIN = 1
CONCURRENT_REQUESTS_PER_IP = 1

...

# Configure item pipelines
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
    'restScrape.pipelines.GeoPipeline': 300,
   'restScrape.pipelines.MongoPipeline': 400,
}
MONGO_URI = 'mongodb://localhost:27017'
MONGO_DATABASE = 'flavortownUSA'


# Enable and configure the AutoThrottle extension (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
AUTOTHROTTLE_ENABLED = False
`.trim()