Class: CubeSmart::Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/cubesmart/crawler.rb

Overview

Used to fetch and parse either HTML or XML via a URL.

Constant Summary collapse

HOST =
'https://www.cubesmart.com'

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.html(url:) ⇒ Nokogiri::HTML::Document

Parameters:

  • url (String)

Returns:

  • (Nokogiri::HTML::Document)

Raises:



11
12
13
# File 'lib/cubesmart/crawler.rb', line 11

def self.html(url:)
  new.html(url:)
end

.xml(url:) ⇒ Nokogiri::XML::Document

Parameters:

  • url (String)

Returns:

  • (Nokogiri::XML::Document)

Raises:



18
19
20
# File 'lib/cubesmart/crawler.rb', line 18

def self.xml(url:)
  new.xml(url:)
end

Instance Method Details

#connectionHTTP::Client

Returns:

  • (HTTP::Client)


23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/cubesmart/crawler.rb', line 23

def connection
  @connection ||= begin
    config = CubeSmart.config

    connection = HTTP.use(:auto_deflate).use(:auto_inflate).persistent(HOST)
    connection = connection.headers(config.headers) if config.headers?
    connection = connection.timeout(config.timeout) if config.timeout?
    connection = connection.via(*config.via) if config.proxy?

    connection
  end
end

#fetch(url:) ⇒ HTTP::Response

Parameters:

  • url (String)

Returns:

  • (HTTP::Response)

Raises:



38
39
40
41
42
43
# File 'lib/cubesmart/crawler.rb', line 38

def fetch(url:)
  response = connection.get(url)
  raise FetchError.new(url:, response: response.flush) unless response.status.ok?

  response
end

#html(url:) ⇒ Nokogiri::XML::Document

Parameters:

  • url (String)

Returns:

  • (Nokogiri::XML::Document)

Raises:



48
49
50
# File 'lib/cubesmart/crawler.rb', line 48

def html(url:)
  Nokogiri::HTML(String(fetch(url:).body))
end

#xml(url:) ⇒ Nokogiri::XML::Document

Parameters:

  • url (String)

Returns:

  • (Nokogiri::XML::Document)

Raises:



55
56
57
# File 'lib/cubesmart/crawler.rb', line 55

def xml(url:)
  Nokogiri::XML(String(fetch(url:).body))
end