婷婷综合国产,91蜜桃婷婷狠狠久久综合9色 ,九九九九九精品,国产综合av

主頁 > 知識庫 > Python 統計數據集標簽的類別及數目操作

Python 統計數據集標簽的類別及數目操作

熱門標簽:富錦商家地圖標注 沈陽人工外呼系統價格 武漢外呼系統平臺 沈陽外呼系統呼叫系統 如何申請400電話費用 外呼系統哪些好辦 池州外呼調研線路 江西省地圖標注 沈陽防封電銷卡品牌

看了大神統計voc數據集標簽框后,針對自己標注數據集,靈活應用 ,感謝!

看代碼吧~

import re
import os
import xml.etree.ElementTree as ET
class1 = 'answer'
class2 = 'hand'
class3 = 'write'
class4 = 'music'
class5 = 'phone'
'''class6 = 'bus'
class7 = 'car'
class8 = 'cat'
class9 = 'chair'
class10 = 'cow'
class11 = 'diningtable'
class12 = 'dog'
class13 = 'horse'
class14 = 'motorbike'
class15 = 'person'
class16 = 'pottedplant'
class17 = 'sheep'
class18 = 'sofa'
class19 = 'train'
class20 = 'tvmonitor'
'''
annotation_folder = '/home/.../train/'		#改為自己標簽文件夾的路徑
#annotation_folder = '/home/.../VOC2007/Annotations/'
list = os.listdir(annotation_folder)
  
def file_name(file_dir):
	L = []
	for root, dirs, files in os.walk(file_dir):
		for file in files:
			if os.path.splitext(file)[1] == '.xml':
				L.append(os.path.join(root, file))
	return L
  
total_number1 = 0
total_number2 = 0
total_number3 = 0
total_number4 = 0
total_number5 = 0
'''total_number6 = 0
total_number7 = 0
total_number8 = 0
total_number9 = 0
total_number10 = 0
total_number11 = 0
total_number12 = 0
total_number13 = 0
total_number14 = 0
total_number15 = 0
total_number16 = 0
total_number17 = 0
total_number18 = 0
total_number19 = 0
total_number20 = 0'''
total = 0
total_pic=0
 
pic_num1 = 0
pic_num2 = 0
pic_num3 = 0
pic_num4 = 0
pic_num5 = 0
'''pic_num6 = 0
pic_num7 = 0
pic_num8 = 0
pic_num9 = 0
pic_num10 = 0
pic_num11 = 0
pic_num12 = 0
pic_num13 = 0
pic_num14 = 0
pic_num15 = 0
pic_num16 = 0
pic_num17 = 0
pic_num18 = 0
pic_num19 = 0
pic_num20 = 0'''
 
flag1 = 0
flag2 = 0
flag3 = 0
flag4 = 0
flag5 = 0
'''flag6 = 0
flag7 = 0
flag8 = 0
flag9 = 0
flag10 = 0
flag11 = 0
flag12 = 0
flag13 = 0
flag14 = 0
flag15= 0
flag16 = 0
flag17 = 0
flag18 = 0
flag19 = 0
flag20 = 0'''
 
xml_dirs = file_name(annotation_folder) 
for i in range(0, len(xml_dirs)):
	print(xml_dirs[i])
	#path = os.path.join(annotation_folder,list[i])
	#print(path)
 
	annotation_file = open(xml_dirs[i]).read()
 
	root = ET.fromstring(annotation_file)
	#tree = ET.parse(annotation_file)
	#root = tree.getroot()
 
	total_pic = total_pic + 1
	for obj in root.findall('object'):
		label = obj.find('name').text
		if label == class1:
			total_number1=total_number1+1
			flag1=1
			total = total + 1
			#print("bounding box number:", total_number1)
		if label == class2:
			total_number2=total_number2+1
			flag2=1
			total = total + 1
		if label == class3:
			total_number3=total_number3+1
			flag3=1
			total = total + 1
		if label == class4:
			total_number4=total_number4+1
			flag4=1
			total = total + 1
		if label == class5:
			total_number5=total_number5+1
			flag5=1
			total = total + 1
		'''if label == class6:
			total_number6=total_number6+1
			flag6=1
			total = total + 1
		if label == class7:
			total_number7=total_number7+1
			flag7=1
			total = total + 1
		if label == class8:
			total_number8=total_number8+1
			flag8=1
			total = total + 1
		if label == class9:
			total_number9=total_number9+1
			flag9=1
			total = total + 1
		if label == class10:
			total_number10=total_number10+1
			flag10=1
			total = total + 1
		if label == class11:
			total_number11=total_number11+1
			flag11=1
			total = total + 1
		if label == class12:
			total_number12=total_number12+1
			flag12=1
			total = total + 1
		if label == class13:
			total_number13=total_number13+1
			flag13=1
			total = total + 1
		if label == class14:
			total_number14=total_number14+1
			flag14=1
			total = total + 1
		if label == class15:
			total_number15=total_number15+1
			flag15=1
			total = total + 1
		if label == class16:
			total_number16=total_number16+1
			flag16=1
			total = total + 1
		if label == class17:
			total_number17=total_number17+1
			flag17=1
			total = total + 1
		if label == class18:
			total_number18=total_number18+1
			flag18=1
			total = total + 1
		if label == class19:
			total_number19=total_number19+1
			flag19=1
			total = total + 1
		if label == class20:
			total_number20=total_number20+1
			flag20=1
			total = total + 1'''
 
	if flag1==1:
		pic_num1=pic_num1+1
		#print("pic number:", pic_num1)
		flag1=0
	if flag2==1:
		pic_num2=pic_num2+1
		flag2=0
	if flag3==1:
		pic_num3=pic_num3+1
		flag3=0
	if flag4==1:
		pic_num4=pic_num4+1
		flag4=0
	if flag5==1:
		pic_num5=pic_num5+1
		flag5=0
	'''if flag6==1:
		pic_num6=pic_num6+1
		flag6=0
	if flag7==1:
		pic_num7=pic_num7+1
		flag7=0
	if flag8==1:
		pic_num8=pic_num8+1
		flag8=0
	if flag9==1:
		pic_num9=pic_num9+1
		flag9=0
	if flag10==1:
		pic_num10=pic_num10+1
		flag10=0
	if flag11==1:
		pic_num11=pic_num11+1
		flag11=0
	if flag12==1:
		pic_num12=pic_num12+1
		flag12=0
	if flag13==1:
		pic_num13=pic_num13+1
		flag13=0
	if flag14==1:
		pic_num14=pic_num14+1
		flag14=0
	if flag15==1:
		pic_num15=pic_num15+1
		flag15=0
	if flag16==1:
		pic_num16=pic_num16+1
		flag16=0
	if flag17==1:
		pic_num17=pic_num17+1
		flag17=0
	if flag18==1:
		pic_num18=pic_num18+1
		flag18=0
	if flag19==1:
		pic_num19=pic_num19+1
		flag19=0
	if flag20==1:
		pic_num20=pic_num20+1
		flag20=0'''
  
print(class1,pic_num1,total_number1)
print(class2,pic_num2,total_number2)
print(class3,pic_num3, total_number3)
print(class4,pic_num4, total_number4)
print(class5,pic_num5, total_number5)
'''print(class6,pic_num6, total_number6)
print(class7,pic_num7, total_number7)
print(class8,pic_num8, total_number8)
print(class9,pic_num9, total_number9)
print(class10,pic_num10, total_number10)
print(class11,pic_num11,total_number11)
print(class12,pic_num12,total_number12)
print(class13,pic_num13, total_number13)
print(class14,pic_num14, total_number14)
print(class15,pic_num15, total_number15)
print(class16,pic_num16, total_number16)
print(class17,pic_num17, total_number17)
print(class18,pic_num18, total_number18)
print(class19,pic_num19, total_number19)
print(class20,pic_num20, total_number20)'''
 
print("total", total_pic, total)
 

補充:【數據集處理】Python對目標檢測數據集xml文件操作(統計目標種類、數量、面積、比例等修改目標名字)

1. 根據xml文件統計目標種類以及數量

# -*- coding:utf-8 -*-
#根據xml文件統計目標種類以及數量
import os
import xml.etree.ElementTree as ET
import numpy as np
np.set_printoptions(suppress=True, threshold=np.nan)
import matplotlib
from PIL import Image
 
def parse_obj(xml_path, filename):
  tree=ET.parse(xml_path+filename)
  objects=[]
  for obj in tree.findall('object'):
    obj_struct={}
    obj_struct['name']=obj.find('name').text
    objects.append(obj_struct)
  return objects
  
def read_image(image_path, filename):
  im=Image.open(image_path+filename)
  W=im.size[0]
  H=im.size[1]
  area=W*H
  im_info=[W,H,area]
  return im_info
  
if __name__ == '__main__':
  xml_path='/home/dlut/網絡/make_database/數據集——合集/VOCdevkit/VOC2018/Annotations/'
  filenamess=os.listdir(xml_path)
  filenames=[]
  for name in filenamess:
    name=name.replace('.xml','')
    filenames.append(name)
  recs={}
  obs_shape={}
  classnames=[]
  num_objs={}
  obj_avg={}
  for i,name in enumerate(filenames):
    recs[name]=parse_obj(xml_path, name+ '.xml' )
  for name in filenames:
    for object in recs[name]:
      if object['name'] not in num_objs.keys():
         num_objs[object['name']]=1
      else:
         num_objs[object['name']]+=1
      if object['name'] not in classnames:
         classnames.append(object['name'])
  for name in classnames:
    print('{}:{}個'.format(name,num_objs[name]))
  print('信息統計算完畢。')

2.根據xml文件統計目標的平均長度、寬度、面積以及每一個目標在原圖中的占比

# -*- coding:utf-8 -*-
#統計
# 計算每一個目標在原圖中的占比
# 計算目標的平均長度、
# 計算平均寬度,
# 計算平均面積、
# 計算目標平均占比
import os
import xml.etree.ElementTree as ET
import numpy as np
#np.set_printoptions(suppress=True, threshold=np.nan)  #10,000,000
np.set_printoptions(suppress=True, threshold=10000000)  #10,000,000
import matplotlib
from PIL import Image
def parse_obj(xml_path, filename):
    tree = ET.parse(xml_path + filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)
    return objects
def read_image(image_path, filename):
    im = Image.open(image_path + filename)
    W = im.size[0]
    H = im.size[1]
    area = W * H
    im_info = [W, H, area]
    return im_info
if __name__ == '__main__':
    image_path = '/home/dlut/網絡/make_database/數據集——合集/VOCdevkit/VOC2018/JPEGImages/'
    xml_path = '/home/dlut/網絡/make_database/數據集——合集/VOCdevkit/VOC2018/Annotations/'
    filenamess = os.listdir(xml_path)
    filenames = []
    for name in filenamess:
        name = name.replace('.xml', '')
        filenames.append(name)
    print(filenames)
    recs = {}
    ims_info = {}
    obs_shape = {}
    classnames = []
    num_objs={}
    obj_avg = {}
    for i, name in enumerate(filenames):
        print('正在處理 {}.xml '.format(name))
        recs[name] = parse_obj(xml_path, name + '.xml')
        print('正在處理 {}.jpg '.format(name))
        ims_info[name] = read_image(image_path, name + '.jpg')
    print('所有信息收集完畢。')
    print('正在處理信息......')
    for name in filenames:
        im_w = ims_info[name][0]
        im_h = ims_info[name][1]
        im_area = ims_info[name][2]
        for object in recs[name]:
            if object['name'] not in num_objs.keys():
                num_objs[object['name']] = 1
            else:
                num_objs[object['name']] += 1
            #num_objs += 1
            ob_w = object['bbox'][2] - object['bbox'][0]
            ob_h = object['bbox'][3] - object['bbox'][1]
            ob_area = ob_w * ob_h
            w_rate = ob_w / im_w
            h_rate = ob_h / im_h
            area_rate = ob_area / im_area
            if not object['name'] in obs_shape.keys():
                obs_shape[object['name']] = ([[ob_w,
                                               ob_h,
                                               ob_area,
                                               w_rate,
                                               h_rate,
                                               area_rate]])
            else:
                obs_shape[object['name']].append([ob_w,
                                                  ob_h,
                                                  ob_area,
                                                  w_rate,
                                                  h_rate,
                                                  area_rate])
        if object['name'] not in classnames:
            classnames.append(object['name'])  # 求平均
    for name in classnames:
        obj_avg[name] = (np.array(obs_shape[name]).sum(axis=0)) / num_objs[name]
        print('{}的情況如下:*******\n'.format(name))
        print('  目標平均W={}'.format(obj_avg[name][0]))
        print('  目標平均H={}'.format(obj_avg[name][1]))
        print('  目標平均area={}'.format(obj_avg[name][2]))
        print('  目標平均與原圖的W比例={}'.format(obj_avg[name][3]))
        print('  目標平均與原圖的H比例={}'.format(obj_avg[name][4]))
        print('  目標平均原圖面積占比={}\n'.format(obj_avg[name][5]))
    print('信息統計計算完畢。')

3.修改xml文件中某個目標的名字為另一個名字

#修改xml文件中的目標的名字,
import os, sys
import glob
from xml.etree import ElementTree as ET
# 批量讀取Annotations下的xml文件
# per=ET.parse(r'C:\Users\rockhuang\Desktop\Annotations\000003.xml')
xml_dir = r'/home/dlut/網絡/make_database/數據集——合集/VOCdevkit/VOC2018/Annotations'
xml_list = glob.glob(xml_dir + '/*.xml')
for xml in xml_list:
    print(xml)
    per = ET.parse(xml)
    p = per.findall('/object')
    for oneper in p:  # 找出person節點
        child = oneper.getchildren()[0]  # 找出person節點的子節點
        if child.text == 'PinNormal':   #需要修改的名字
            child.text = 'normal bolt'    #修改成什么名字
        if child.text == 'PinDefect':    #需要修改的名字
            child.text = 'defect bolt-1'   #修改成什么名字
    per.write(xml)
    print(child.tag, ':', child.text)

修改為:

以上為個人經驗,希望能給大家一個參考,也希望大家多多支持腳本之家。

您可能感興趣的文章:
  • Python統計可散列的對象之容器Counter詳解
  • Python 統計列表中重復元素的個數并返回其索引值的實現方法
  • Python實戰之單詞打卡統計
  • python之cur.fetchall與cur.fetchone提取數據并統計處理操作
  • python自動統計zabbix系統監控覆蓋率的示例代碼
  • python 統計代碼耗時的幾種方法分享
  • Python統計列表元素出現次數的方法示例
  • python統計RGB圖片某像素的個數案例
  • Python jieba 中文分詞與詞頻統計的操作
  • 利用Python3實現統計大量單詞中各字母出現的次數和頻率的方法
  • 使用Python 統計文件夾內所有pdf頁數的小工具
  • python 統計list中各個元素出現的次數的幾種方法
  • python調用百度AI接口實現人流量統計
  • Python代碼覆蓋率統計工具coverage.py用法詳解
  • python 爬蟲基本使用——統計杭電oj題目正確率并排序
  • 利用python匯總統計多張Excel
  • python統計mysql數據量變化并調用接口告警的示例代碼
  • 用python實現監控視頻人數統計

標簽:阿里 通遼 潛江 黑龍江 銅川 常德 株洲 呂梁

巨人網絡通訊聲明:本文標題《Python 統計數據集標簽的類別及數目操作》,本文關鍵詞  Python,統計數據,集,標簽,;如發現本文內容存在版權問題,煩請提供相關信息告之我們,我們將及時溝通與處理。本站內容系統采集于網絡,涉及言論、版權與本站無關。
  • 相關文章
  • 下面列出與本文章《Python 統計數據集標簽的類別及數目操作》相關的同類信息!
  • 本頁收集關于Python 統計數據集標簽的類別及數目操作的相關信息資訊供網民參考!
  • 推薦文章
    主站蜘蛛池模板: 霍州市| 个旧市| 新源县| 温州市| 万山特区| 浦北县| 万山特区| 黎川县| 安西县| 日土县| 治县。| 廊坊市| 五莲县| 牡丹江市| 广南县| 聂拉木县| 荔波县| 曲麻莱县| 襄城县| 丰台区| 和林格尔县| 牡丹江市| 灵宝市| 浏阳市| 剑川县| 新竹市| 绵竹市| 桦甸市| 临汾市| 温宿县| 巴楚县| 濉溪县| 张北县| 夏邑县| 淳化县| 涞源县| 睢宁县| 盐池县| 曲水县| 桐梓县| 满洲里市|