Skip to content

Commit

Permalink
NUTCH-3029 Host specific max. and min. intervals in adaptive scheduler
Browse files Browse the repository at this point in the history
  • Loading branch information
Markus Jelsma committed Mar 13, 2024
1 parent 5ba50c0 commit 84cda2a
Showing 1 changed file with 11 additions and 6 deletions.
17 changes: 11 additions & 6 deletions src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,9 @@ private void setHostSpecificIntervals(String fileName,

/**
* Strip a URL, leaving only the host name.
*
* @param url url to get hostname for
* @return hostname
*/
public static String getHostName(String url) throws URISyntaxException {
URI uri = new URI(url);
Expand All @@ -198,9 +201,10 @@ public static String getHostName(String url) throws URISyntaxException {

/**
* Returns the max_interval for this URL, which might depend on the host.
* @param url the URL to be scheduled
* @param defaultMaxInterval the value to which to default
* if max_interval has not been configured for this host
*
* @param url the URL to be scheduled
* @param defaultMaxInterval the value to which to default if max_interval has not been configured for this host
* @return the configured maximum interval or the default interval
*/
public float getMaxInterval(Text url, float defaultMaxInterval){
if (hostSpecificMaxInterval.isEmpty()) {
Expand All @@ -220,9 +224,10 @@ public float getMaxInterval(Text url, float defaultMaxInterval){

/**
* Returns the min_interval for this URL, which might depend on the host.
* @param url the URL to be scheduled
* @param defaultMinInterval the value to which to default
* if min_interval has not been configured for this host
*
* @param url the URL to be scheduled
* @param defaultMinInterval the value to which to default if min_interval has not been configured for this host
* @return the configured minimum interval or the default interval
*/
public float getMinInterval(Text url, float defaultMinInterval){
if (hostSpecificMinInterval.isEmpty()) {
Expand Down

0 comments on commit 84cda2a

Please sign in to comment.