Get Top 10 Tagged User Repositories on GitHub | python

Python Methods and Functions

We often write scripts in Python to make our task easier, so here's a script to help you get any user's top 10 flagged repositories on GitHub.

You just need a Github username (ex: msdeep14 ) to run the script.

Script Explanation:

  1. First access the user's repository url, for example: username = "msdeep14" then url = "https://github.com/msdeep14?tab=repositories"
  2. Now clear the url page and select stars, repository name and repository URL using BeautifulSoup.
  3. There are 30 repositories in one page, so if a user has more than 30 repositories, you need a loop to access all pages.
  4. Use urllib2 or BeautifulSoup to clear the page, code uses both, see code below.

# Python3 script to get the top 10 marked
# user repositories on github

import urllib.request, urllib.parse, urllib.error

import urllib.request, urllib.error, urllib.parse

import http.cookiejar

import requests

from lxml import html

from lxml import etree

from bs4 import BeautifulSoup

import re

import operator

  

top_limit = 9

 

def openWebsite ():

 

# enter your Github username

User #

  username = str ( input ( "enter GitHub username:" ))

  

# A dictionary for storing the key as a repository

# name and value as no. stars

repo_dict = {}

 

  # This is the first page of the URL where the user

# repositories are located

  url = " https://github.com/ " + username + "? tab = repositories"

 

  # loop for all pages

while True :

  

"" "

You can read urllib2 documents and

BeautifulSoup to see how the HTML page

can be cleared for retrieval data

 

urllib2: https://docs.python.org/2/library/urllib2.html

BeautifulSoup: https://www.crummy.com/software/Be autifulSoup / bs4 / doc /

“” ”

 

# open site and get

# HTML page to document

cj = http. cookiejar.CookieJar ()

opener = urllib.request.build_opener (urllib.request.HTTPCookieProcessor (cj))

resp = opener. open (url)

doc = html.fromstring (resp.read ())

 

# fetch all repository names

repo_name = doc.xpath ( '// li [@ class ="col-12 d-block width-full py-4 border-bottom public source "] / div [@ class = "d-inline-block mb-1"] / h3 / a / text () ' )

 

# list for storing repository names

repo_list = []

  

  # get the repository name

  for name in repo_name:

  name = '' .join ('' .join (name) .split ())

  repo_list .append (name)

repo_dict [name] = 0

  

# print repo_list

response = requests.get (url)

  soup = BeautifulSoup (response.text, 'html.parser' )

  

"" "

  Path mentioned to get no. from

Stars, you can rightfully get this

click on the star symbol on the Github page,

  and then select check item

"" "

  soup = BeautifulSoup (response.text, 'html.parser' )

div = soup.find_all ( 'li' , { 'class' : ' col-12 d-block width-full py-4 border-bottom public source' })

 

for d in div:

temp = d.find_all ( 'div' , { ' class' : 'f6 text-gray mt -2' })

for t in temp:

 

# Get no. stars

# specific repository

x = t.find_all ( 'a' , attrs = { ' href' : re. compile ( "^ / [a-zA-Z0-9 - _.] + / [a-zA-Z0-9 .- _] + / stargazers " )})

  

# Get the repository URL

# and fill in the dictionary values ​​

# with not. stars

if len (x) is not 0 :

name = x [ 0 ] .get ( 'href' )

  name = name [ len (username) + 2 : - 11 ]

repo_dict [name] = int (x [ 0 ]. text)

 

 

# Check if there is next page

# for more repos

div = soup.find ( 'a' , { ' class' : 'next_page' })

 

# print div

  if div is not None :

url = div.get ( 'href' )

  url = " https://github.com/ " + url

else :

# if there is no next repository

# page, then break out of the loop

break

 

# Get a sorted list of all

# repo and print top 10

i = 0

sorted_repo = sorted ( iter (repo_dict.items ()), key = operator.itemgetter ( 1 ))

 

# Print sorted repositories to

# reverse order

for val in reversed (sorted_repo):

repo_url = " https://github.com/ " + username + "/" + val [ 0 ]

print ( " repo name: " , val [ 0 ], "repo url :" , repo_url, "stars :" , val [ 1 ])

i = i + 1

if i & gt; top_limit:

break

 

 
# Driver program

if __ name__ = = " __ main__ " :

  openWebsite ()

Output:

 enter GitHub username: msdeep14 repo name: DeepDataBase repo url: https://github.com/msdeep14/DeepDataBase stars: 13 repo name: MiniDataBase repo url: https:// github.com/msdeep14/MiniDataBase stars: 8 repo name: hackerranksolutions repo url: https://github.com/msdeep14/hackerranksolutions stars: 6 repo name: stayUpdated repo url: https://github.com/msdeep14/stayUpdated stars : 6 re po name: IRCTC repo url: https://github.com/msdeep14/IRCTC stars: 4 repo name: play_2048 repo url: https://github.com/msdeep14/play_2048 stars: 3 repo name: Tripcount repo url: https : //github.com/msdeep14/Tripcount stars: 3 repo name: SnapLook repo url:https://github.com/msdeep14/SnapLook stars: 2 repo name: fbFun repo url: https://github.com/msdeep14/fbFun stars: 2 repo name: ByteCode repo url: https://github.com/ msdeep14 / ByteCode stars: 2 

Video tutorial

Full link to the repository: trackGitHubStars





Tutorials